├── .gitignore
├── README.md
├── analyzer.py
├── archive.py
├── config.yaml
├── interactive.py
├── job.py
├── manager.py
├── plot_util.py
├── plotman.py
└── reporting.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | config.yaml
3 | *.bak
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # `windows plotman`: an attempt to get plotman to work on windows
 2 | 
 3 | THIS IS A BETA.  Not ready for production use just yet.  Almost, but not quite there yet.
 4 | 
 5 | This is a tool for managing Chia based on Plotman (https://github.com/ericaltendorf/plotman)
 6 | 
 7 | This is a work in progress. Please see the original linux version for additional info:
 8 | 
 9 | https://github.com/ericaltendorf/plotman
10 | 
11 | Please contact me at: chia@ifhya.com or Wolfrage on discord in the #chia channel: https://discord.gg/JESmva9R - pop in and say hi!
12 | 
13 | ## Known issues:
14 | 
15 | - Archiving hasn't been touched
16 | - IO stats does not work on windows
17 | - Resizing terminal messes up the curses display
18 | - Common dir prefix does not work so whole dir path is shown for temps and dest drives
19 | - Various other issues
20 | 
21 | ## Installation
22 | 
23 | Copy your chia.exe file to chia2.exe and use that for now!  Avoids conflicts with plots running from GUI or commandline.  Will adjust after testing complete.
24 | 
25 | This program requires `psutil`, `pyfakefs`,`texttable`, `windows-curses`, `pyreadline`, `pyyaml`, and `pypsutil`.
26 | 
27 | First, Edit manager.py to hardcode your chia.exe location (sorry, manual for now). Second, Edit config.yaml for your settings. Third, provide feedback and help me with this because I am soooo not a python guy. I'm really, really, really not. Fourthly, send me pizza.
28 | 
29 | Run command in Windows Powershell: `python plotman.py interactive`
30 | 
31 | ![image](https://user-images.githubusercontent.com/75458290/113492313-8c0ad680-94a4-11eb-93da-e93521dddde3.png)
32 | 


--------------------------------------------------------------------------------
/analyzer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import os
 4 | import re
 5 | import statistics
 6 | import sys
 7 | import texttable as tt
 8 | 
 9 | import plot_util
10 | 
11 | class LogAnalyzer:
12 |     # Map from key (e.g. logdir or the like) to (map from measurement name to list of values)
13 |     all_measures = ['phase 1', 'phase 2', 'phase 3', 'phase 4', 'total time']
14 | 
15 |     def analyze(self, logfilenames):
16 |         data = {}
17 |         for logfilename in logfilenames:
18 |             with open(logfilename, 'r') as f:
19 |                 key = 'x'  # TODO
20 |                 for line in f:
21 |                     #
22 |                     # Aggregation specification
23 |                     #
24 | 
25 |                     # Starting phase 1/4: Forward Propagation into tmp files... Sun Nov 15 00:35:57 2020
26 |                     # TODO: This only does by day!!!
27 |                     m = re.search(r'^Starting phase 1/4.*files.*\d\d (\d\d):\d\d:\d\d \d\d\d\d', line)
28 |                     if m:
29 |                         bucketsize = 2
30 |                         hour = int(m.group(1))
31 |                         hourbucket = int(hour / bucketsize)
32 |                         # key += '-%02d-%02d' % (hourbucket * bucketsize, (hourbucket + 1) * bucketsize)
33 | 
34 |                     # Starting plotting progress into temporary dirs: /mnt/tmp/01 and /mnt/tmp/a
35 |                     m = re.search(r'^Starting plotting.*dirs: (.*) and (.*)', line)
36 |                     if False and m:
37 |                         tmpdir = m.group(1)
38 |                         # Hack to split data for backing hardware
39 |                         tmpdir_idx = tmpdir[-2:]
40 |                         if tmpdir_idx in ['00', '01']:
41 |                             key += '-wd-raid'
42 |                         if tmpdir_idx in ['02', '03', '04', '05']:
43 |                             key += '-samsung'
44 | 
45 |                     #
46 |                     # Data collection
47 |                     #
48 | 
49 |                     # Time for phase 1 = 22796.7 seconds. CPU (98%) Tue Sep 29 17:57:19 2020
50 |                     for phase in ['1', '2', '3', '4']:
51 |                         m = re.search(r'^Time for phase ' + phase + ' = (\d+.\d+) seconds..*', line)
52 |                         if m:
53 |                             data.setdefault(key, {}).setdefault('phase ' + phase, []).append(float(m.group(1)))
54 | 
55 |                     # Total time = 49487.1 seconds. CPU (97.26%) Wed Sep 30 01:22:10 2020
56 |                     m = re.search(r'^Total time = (\d+.\d+) seconds.*', line)
57 |                     if m:
58 |                         data.setdefault(key, {}).setdefault('total time', []).append(float(m.group(1)))
59 | 
60 |         # Prepare report
61 |         tab = tt.Texttable()
62 |         headings = ['Key'] + self.all_measures
63 |         tab.header(headings)
64 | 
65 |         #for logdir in logdirs:
66 |         for key in data.keys():
67 |             row = [key]
68 |             for measure in self.all_measures:
69 |                 values = data.get(key, {}).get(measure, [])
70 |                 if(len(values) > 1):
71 |                     row.append('μ=%s σ=%s' % (
72 |                         plot_util.human_format(statistics.mean(values), 1),
73 |                         plot_util.human_format(statistics.stdev(values), 0)
74 |                         ))
75 |                 elif(len(values) == 1):
76 |                     row.append(plot_util.human_format(values[0], 1))
77 |                 else:
78 |                     row.append('N/A')
79 |             tab.add_row(row)
80 | 
81 |         (rows, columns) = os.popen('stty size', 'r').read().split()
82 |         tab.set_max_width(int(columns))
83 |         s = tab.draw()
84 |         print(s)
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/archive.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | from datetime import datetime
  4 | import subprocess
  5 | import argparse
  6 | import math
  7 | import os
  8 | import psutil
  9 | import re
 10 | import random
 11 | import sys
 12 | 
 13 | import texttable as tt
 14 | 
 15 | import manager
 16 | import plot_util
 17 | 
 18 | # TODO : write-protect and delete-protect archived plots
 19 | 
 20 | def compute_priority(phase, gb_free, n_plots):
 21 |     # All these values are designed around dst buffer dirs of about
 22 |     # ~2TB size and containing k32 plots.  TODO: Generalize, and
 23 |     # rewrite as a sort function.
 24 | 
 25 |     priority = 50
 26 | 
 27 |     # To avoid concurrent IO, we should not touch drives that
 28 |     # are about to receive a new plot
 29 |     if (phase == (3, 4)):
 30 |         priority -= 4
 31 |     elif (phase == (3, 5)):
 32 |         priority -= 8
 33 |     elif (phase == (3, 6)):
 34 |         priority -= 16
 35 |     elif (phase >= (3, 7)):
 36 |         priority -= 32
 37 |     
 38 |     # If a drive is getting full, we should prioritize it
 39 |     if (gb_free < 1000):
 40 |         priority += 1 + int((1000 - gb_free) / 100)
 41 |     if (gb_free < 500):
 42 |         priority += 1 + int((500 - gb_free) / 100)
 43 | 
 44 |     # Finally, least importantly, pick drives with more plots
 45 |     # over those with fewer.
 46 |     priority += n_plots
 47 | 
 48 |     return priority
 49 | 
 50 | def get_archdir_freebytes(arch_cfg):
 51 |     archdir_freebytes = { }
 52 |     df_cmd = ('ssh %s@%s df -BK | grep " %s/"' %
 53 |         (arch_cfg['rsyncd_user'], arch_cfg['rsyncd_host'], arch_cfg['rsyncd_path']) )
 54 |     with subprocess.Popen(df_cmd, shell=True, stdout=subprocess.PIPE) as proc:
 55 |         for line in proc.stdout.readlines():
 56 |             fields = line.split()
 57 |             freebytes = int(fields[3][:-1]) * 1024  # Strip the final 'K'
 58 |             archdir = (fields[5]).decode('ascii')
 59 |             archdir_freebytes[archdir] = freebytes
 60 |     return archdir_freebytes
 61 | 
 62 | def rsync_dest(arch_cfg, arch_dir):
 63 |     rsync_path = arch_dir.replace(arch_cfg['rsyncd_path'], arch_cfg['rsyncd_module'])
 64 |     if rsync_path.startswith('/'):
 65 |         rsync_path = rsync_path[1:]  # Avoid dup slashes.  TODO use path join?
 66 |     rsync_url = 'rsync://%s@%s:12000/%s' % (
 67 |             arch_cfg['rsyncd_user'], arch_cfg['rsyncd_host'], rsync_path)
 68 |     return rsync_url
 69 | 
 70 | # TODO: maybe consolidate with similar code in job.py?
 71 | def get_running_archive_jobs(arch_cfg):
 72 |     '''Look for running rsync jobs that seem to match the pattern we use for archiving
 73 |        them.  Return a list of PIDs of matching jobs.'''
 74 |     jobs = []
 75 |     dest = rsync_dest(arch_cfg, '/')
 76 |     for proc in psutil.process_iter(['pid', 'name']):
 77 |         if proc.name() == 'rsync':
 78 |             args = proc.cmdline()
 79 |             for arg in args:
 80 |                 if arg.startswith(dest):
 81 |                     jobs.append(proc.pid)
 82 |     return jobs
 83 | 
 84 | def archive(dir_cfg, all_jobs):
 85 |     '''Configure one archive job.  Needs to know all jobs so it can avoid IO
 86 |     contention on the plotting dstdir drives.  Returns either (False, <reason>) 
 87 |     if we should not execute an archive job or (True, <cmd>) with the archive
 88 |     command if we should.'''
 89 | 
 90 |     dstdirs = dir_cfg['dst']
 91 |     arch_cfg = dir_cfg['archive']
 92 | 
 93 |     dir2ph = manager.dstdirs_to_furthest_phase(all_jobs)
 94 |     best_priority = -100000000
 95 |     chosen_plot = None
 96 | 
 97 |     for d in dstdirs:
 98 |         ph = dir2ph.get(d, (0, 0))
 99 |         dir_plots = plot_util.list_k32_plots(d)
100 |         gb_free = plot_util.df_b(d) / plot_util.GB
101 |         n_plots = len(dir_plots)
102 |         priority = compute_priority(ph, gb_free, n_plots) 
103 |         if priority >= best_priority and dir_plots:
104 |             best_priority = priority
105 |             chosen_plot = dir_plots[0]
106 | 
107 |     if not chosen_plot:
108 |         return (False, 'No plots found')
109 | 
110 |     # TODO: sanity check that archive machine is available
111 |     # TODO: filter drives mounted RO
112 | 
113 |     #
114 |     # Pick first archive dir with sufficient space
115 |     #
116 |     archdir_freebytes = get_archdir_freebytes(arch_cfg)
117 |     if not archdir_freebytes:
118 |         return(False, 'No free archive dirs found.')
119 |     
120 |     archdir = ''
121 |     for (d, space) in sorted(archdir_freebytes.items()):
122 |         # TODO: make buffer configurable
123 |         if space > 1.2 * plot_util.get_k32_plotsize():   # Leave a little buffer
124 |             archdir = d
125 |             freespace = space
126 |             break
127 | 
128 |     if not archdir:
129 |         return(False, 'No archive directories found with enough free space')
130 |     
131 |     msg = 'Found %s with ~%d GB free' % (archdir, freespace / plot_util.GB)
132 | 
133 |     bwlimit = arch_cfg['rsyncd_bwlimit']
134 |     throttle_arg = ('--bwlimit=%d' % bwlimit) if bwlimit else ''
135 |     cmd = ('rsync %s --remove-source-files -P %s %s' %
136 |             (throttle_arg, chosen_plot, rsync_dest(arch_cfg, archdir)))
137 | 
138 |     return (True, cmd)
139 | 


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
 1 | # Where to plot and log.
 2 | directories:
 3 |         # One directory in which to store all plot job logs (the STDOUT/
 4 |         # STDERR of all plot jobs).  In order to monitor progress, plotman
 5 |         # reads these logs on a regular basis, so using a fast drive is
 6 |         # recommended.
 7 |         log: C:\chia
 8 | 
 9 |         # One or more directories to use as tmp dirs for plotting.  The
10 |         # scheduler will use all of them and distribute jobs among them.
11 |         # It assumes that IO is independent for each one (i.e., that each
12 |         # one is on a different physical device).
13 |         #
14 |         # If multiple directories share a common prefix, reports will
15 |         # abbreviate and show just the uniquely identifying suffix.
16 |         tmp:
17 |                 - e:\1
18 |                 - h:\1
19 |                 - e:\2
20 |                 - i:\1
21 |                 - e:\3
22 |                 - h:\2
23 |                 - i:\2
24 |                 - e:\4
25 | 
26 | 
27 |         # Optional: tmp2 directory.  If specified, will be passed to
28 |         # chia plots create as -2.  Only one tmp2 directory is supported.
29 |         # tmp2: /mnt/tmp/a
30 | 
31 |         # One or more directories; the scheduler will use all of them.
32 |         # These again are presumed to be on independent physical devices,
33 |         # so writes (plot jobs) and reads (archivals) can be scheduled
34 |         # to minimize IO contention.
35 |         dst:
36 |                 - z:\
37 |                 - f:\
38 |                             
39 |                 
40 |         # Archival configuration.  Optional; if you do not wish to run the
41 |         # archiving operation, comment this section out.
42 |         #
43 |         # Currently archival depends on an rsync daemon running on the remote
44 |         # host, and that the module is configured to match the local path.
45 |         # See code for details.
46 |        # archive:
47 |        #         rsyncd_module: plots
48 |        #         rsyncd_path: /plots
49 |        #         rsyncd_bwlimit: 80000  # Bandwidth limit in KB/s
50 |        #         rsyncd_host: myfarmer
51 |        #         rsyncd_user: chia
52 | 
53 | 
54 | # Plotting scheduling parameters
55 | scheduling:
56 |         # Don't run a job on a particular temp dir until all existing jobs
57 |         # have progresed at least this far.  Phase major corresponds to the
58 |         # plot phase, phase minor corresponds to the table or table pair
59 |         # in sequence.
60 |         tmpdir_stagger_phase_major: 2
61 |         tmpdir_stagger_phase_minor: 1
62 | 
63 |         # Don't run more than this many jobs at a time on a single temp dir.
64 |         tmpdir_max_jobs: 1
65 | 
66 |         # Don't run any jobs (across all temp dirs) more often than this.
67 |         global_stagger_m: 35
68 | 
69 |         # How often the daemon wakes to consider starting a new plot job
70 |         polling_time_s: 5
71 | 
72 | 
73 | # Plotting parameters.  These are pass-through parameters to chia plots create.
74 | # See documentation at
75 | # https://github.com/Chia-Network/chia-blockchain/wiki/CLI-Commands-Reference#create
76 | plotting:
77 |         k: 32
78 |         e: False              # Use -e plotting option
79 |         n_threads: 3        # Threads per job
80 |         n_buckets: 128       # Number of buckets to split data into
81 |         job_buffer: 6500     # Per job memory
82 | 


--------------------------------------------------------------------------------
/interactive.py:
--------------------------------------------------------------------------------
  1 | import curses
  2 | import datetime
  3 | import locale
  4 | import os
  5 | import subprocess
  6 | import threading
  7 | import yaml
  8 | 
  9 | #from unicurses import *
 10 | 
 11 | from job import Job
 12 | import archive
 13 | import manager
 14 | import reporting
 15 | 
 16 | class Log:
 17 |     def __init__(self):
 18 |         self.entries = []
 19 |         self.cur_pos = 0
 20 | 
 21 |     # TODO: store timestamp as actual timestamp indexing the messages
 22 |     def log(self, msg):
 23 |         '''Log the message and scroll to the end of the log'''
 24 |         ts = datetime.datetime.now().strftime('%m-%d %H:%M:%S')
 25 |         self.entries.append(ts + ' ' + msg)
 26 |         self.cur_pos = len(self.entries)
 27 | 
 28 |     def tail(self, num_entries):
 29 |         '''Return the entries at the end of the log.  Consider cur_slice() instead.'''
 30 |         return self.entries[-num_entries:]
 31 | 
 32 |     def shift_slice(self, offset):
 33 |         '''Positive shifts towards end, negative shifts towards beginning'''
 34 |         self.cur_pos = max(0, min(len(self.entries), self.cur_pos + offset))
 35 | 
 36 |     def shift_slice_to_end(self):
 37 |         self.cur_pos = len(self.entries)
 38 | 
 39 |     def get_cur_pos(self):
 40 |         return self.cur_pos
 41 | 
 42 |     def cur_slice(self, num_entries):
 43 |         '''Return num_entries log entries up to the current slice position'''
 44 |         return self.entries[max(0, self.cur_pos - num_entries) : self.cur_pos]
 45 | 
 46 |     def fill_log(self):
 47 |         '''Add a bunch of stuff to the log.  Useful for testing.'''
 48 |         for i in range(100):
 49 |             self.log('Log line %d' % i)
 50 | 
 51 | def plotting_status_msg(active, status):
 52 |     if active:
 53 |         return '(active) ' + status
 54 |     else:
 55 |         return '(inactive) ' + status
 56 | 
 57 | def archiving_status_msg(configured, active, status):
 58 |     if configured:
 59 |         if active:
 60 |             return '(active) ' + status
 61 |         else:
 62 |             return '(inactive) ' + status
 63 |     else:
 64 |         return '(not configured)'
 65 | 
 66 | def curses_main(stdscr):
 67 |     # TODO: figure out how to pass the configs in from plotman.py instead of
 68 |     # duplicating the code here.
 69 |     with open('config.yaml', 'r') as ymlfile:
 70 |         cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)
 71 |     dir_cfg = cfg['directories']
 72 |     sched_cfg = cfg['scheduling']
 73 |     plotting_cfg = cfg['plotting']
 74 | 
 75 |     log = Log()
 76 | 
 77 |     plotting_active = True
 78 |     archiving_configured = 'archive' in dir_cfg
 79 |     archiving_active = archiving_configured
 80 | 
 81 |     (n_rows, n_cols) = map(int, stdscr.getmaxyx())
 82 | 
 83 |     # Page layout.  Currently requires at least ~40 rows.
 84 |     # TODO: make everything dynamically figure to best use available space
 85 |     header_height = 3
 86 |     jobs_height = 10
 87 |     dirs_height = 14
 88 |     logscreen_height = n_rows - (header_height + jobs_height + dirs_height)
 89 | 
 90 |     header_pos = 0
 91 |     jobs_pos = header_pos + header_height
 92 |     dirs_pos = jobs_pos + jobs_height
 93 |     logscreen_pos = dirs_pos + dirs_height
 94 | 
 95 |     plotting_status = '<startup>'    # todo rename these msg?
 96 |     archiving_status = '<startup>'
 97 | 
 98 |     refresh_period = int(sched_cfg['polling_time_s'])
 99 | 
100 |     stdscr.nodelay(True)  # make getch() non-blocking
101 |     stdscr.timeout(2000)
102 | 
103 |     header_win = curses.newwin(header_height, n_cols, header_pos, 0)
104 |     log_win = curses.newwin(logscreen_height, n_cols, logscreen_pos, 0)
105 |     jobs_win = curses.newwin(jobs_height, n_cols, jobs_pos, 0)
106 |     dirs_win = curses.newwin(dirs_height, n_cols, dirs_pos, 0)
107 | 
108 |     jobs = Job.get_running_jobs(dir_cfg['log'])
109 |     last_refresh = datetime.datetime.now()
110 | 
111 |     pressed_key = ''   # For debugging
112 | 
113 |     while True:
114 | 
115 |         # TODO: handle resizing.  Need to (1) figure out how to reliably get
116 |         # the terminal size -- the recommended method doesn't seem to work:
117 |         #    (n_rows, n_cols) = [int(v) for v in stdscr.getmaxyx()]
118 |         # Consider instead:
119 |         #    ...[int(v) for v in os.popen('stty size', 'r').read().split()]
120 |         # and then (2) implement the logic to resize all the subwindows as above
121 | 
122 |        # stdscr.clear()      
123 |         linecap = n_cols - 1
124 |         logscreen_height = n_rows - (header_height + jobs_height + dirs_height)
125 | 
126 |         elapsed = (datetime.datetime.now() - last_refresh).total_seconds() 
127 | 
128 |         # A full refresh scans for and reads info for running jobs from
129 |         # scratch (i.e., reread their logfiles).  Otherwise we'll only
130 |         # initialize new jobs, and mostly rely on cached info.
131 |         do_full_refresh = elapsed >= refresh_period
132 | 
133 |         if not do_full_refresh:
134 |             jobs = Job.get_running_jobs(dir_cfg['log'], cached_jobs=jobs)
135 | 
136 |         else:
137 |             last_refresh = datetime.datetime.now()
138 |             jobs = Job.get_running_jobs(dir_cfg['log'])
139 | 
140 |             if plotting_active:
141 |                 (started, msg) = manager.maybe_start_new_plot(dir_cfg, sched_cfg, plotting_cfg)
142 |                 if (started):
143 |                     log.log(msg)
144 |                     plotting_status = '<just started job>'
145 |                     jobs = Job.get_running_jobs(dir_cfg['log'], cached_jobs=jobs)
146 |                 else:
147 |                     plotting_status = msg
148 | 
149 |             if archiving_configured and archiving_active:
150 |                 # Look for running archive jobs.  Be robust to finding more than one
151 |                 # even though the scheduler should only run one at a time.
152 |                 arch_jobs = archive.get_running_archive_jobs(dir_cfg['archive'])
153 |                 if arch_jobs:
154 |                     archiving_status = 'pid: ' + ', '.join(map(str, arch_jobs))
155 |                 else:
156 |                     (should_start, status_or_cmd) = archive.archive(dir_cfg, jobs)
157 |                     if not should_start:
158 |                         archiving_status = status_or_cmd
159 |                     else:
160 |                         cmd = status_or_cmd
161 |                         log.log('Starting archive: ' + cmd)
162 | 
163 |                         # TODO: do something useful with output instead of DEVNULL
164 |                         p = subprocess.Popen(cmd,
165 |                                 shell=True,
166 |                                 stdout=subprocess.DEVNULL,
167 |                                 stderr=subprocess.STDOUT,
168 |                                 start_new_session=True)
169 | 
170 |         # Directory prefixes, for abbreviation
171 |         tmp_prefix = ''#os.path.commonpath(dir_cfg['tmp'])
172 |         dst_prefix = ''#os.path.commonpath(dir_cfg['dst'])
173 |         if archiving_configured:
174 |             arch_prefix = dir_cfg['archive']['rsyncd_path']
175 | 
176 |         # Header
177 |         header_win.addnstr(0, 0, 'Plotman', linecap, curses.A_BOLD)
178 |         timestamp = datetime.datetime.now().strftime("%H:%M:%S")
179 |         refresh_msg = "now" if do_full_refresh else f"{int(elapsed)}s/{refresh_period}"
180 |         header_win.addnstr(f" {timestamp} (refresh {refresh_msg})", linecap)
181 |         header_win.addnstr('  |  <P>lotting: ', linecap, curses.A_BOLD)
182 |         header_win.addnstr(
183 |                 plotting_status_msg(plotting_active, plotting_status), linecap)
184 |         header_win.addnstr(' <A>rchival: ', linecap, curses.A_BOLD)
185 |         header_win.addnstr(
186 |                 archiving_status_msg(archiving_configured,
187 |                     archiving_active, archiving_status), linecap) 
188 | 
189 |         # Oneliner progress display
190 |         header_win.addnstr(1, 0, 'Jobs (%d): ' % len(jobs), linecap)
191 |         header_win.addnstr('[' + reporting.job_viz(jobs) + ']', linecap)
192 | 
193 |         # These are useful for debugging.
194 |         # header_win.addnstr('  term size: (%d, %d)' % (n_rows, n_cols), linecap)  # Debuggin
195 |         # if pressed_key:
196 |             # header_win.addnstr(' (keypress %s)' % str(pressed_key), linecap)
197 |         header_win.addnstr(2, 0, 'Prefixes:', linecap, curses.A_BOLD)
198 |         header_win.addnstr('  tmp=', linecap, curses.A_BOLD)
199 |         header_win.addnstr(tmp_prefix, linecap)
200 |         header_win.addnstr('  dst=', linecap, curses.A_BOLD)
201 |         header_win.addnstr(dst_prefix, linecap)
202 |         if archiving_configured:
203 |             header_win.addnstr('  archive=', linecap, curses.A_BOLD)
204 |             header_win.addnstr(arch_prefix, linecap)
205 |         header_win.addnstr(' (remote)', linecap)
206 |         
207 | 
208 |         # Jobs
209 |         jobs_win.addstr(0, 0, reporting.status_report(jobs, n_cols, jobs_height, 
210 |             tmp_prefix, dst_prefix))
211 |         jobs_win.chgat(0, 0, curses.A_REVERSE)
212 | 
213 |         # Dirs.  Collect reports as strings, then lay out.
214 |         n_tmpdirs = len(dir_cfg['tmp']) 
215 |         n_tmpdirs_half = int(n_tmpdirs / 2)
216 |         tmp_report_1 = reporting.tmp_dir_report(
217 |             jobs, dir_cfg['tmp'], sched_cfg, n_cols, 0, n_tmpdirs_half, tmp_prefix)
218 |         tmp_report_2 = reporting.tmp_dir_report(
219 |             jobs, dir_cfg['tmp'], sched_cfg, n_cols, n_tmpdirs_half, n_tmpdirs, tmp_prefix)
220 | 
221 |         dst_report = reporting.dst_dir_report(
222 |             jobs, dir_cfg['dst'], n_cols, dst_prefix)
223 | 
224 |         if archiving_configured:
225 |             arch_report = reporting.arch_dir_report(
226 |                 archive.get_archdir_freebytes(dir_cfg['archive']), n_cols, arch_prefix)
227 |             if not arch_report:
228 |                 arch_report = '<no archive dir info>'
229 |         else:
230 |             arch_report = '<archiving not configured>'
231 |             
232 |         tmp_h = max(len(tmp_report_1.splitlines()),
233 |                     len(tmp_report_2.splitlines()))
234 |         tmp_w = len(max(tmp_report_1.splitlines() +
235 |                         tmp_report_2.splitlines(), key=len)) + 1
236 |         dst_h = len(dst_report.splitlines())
237 |         dst_w = len(max(dst_report.splitlines(), key=len)) + 1
238 |         arch_h = len(arch_report.splitlines()) + 1
239 |         arch_w = n_cols
240 | 
241 |         tmpwin_12_gutter = 3
242 |         tmpwin_dstwin_gutter = 6
243 | 
244 |         maxtd_h = max([tmp_h, dst_h])
245 | 
246 |         tmpwin_1 = curses.newwin(
247 |                     tmp_h, tmp_w,
248 |                     dirs_pos + int((maxtd_h - tmp_h) / 2), 0)
249 |         tmpwin_1.addstr(tmp_report_1)
250 | 
251 |         tmpwin_2 = curses.newwin(
252 |                     tmp_h, tmp_w,
253 |                     dirs_pos + int((maxtd_h - tmp_h) / 2),
254 |                     tmp_w + tmpwin_12_gutter)
255 |         tmpwin_2.addstr(tmp_report_2)
256 | 
257 |         tmpwin_1.chgat(0, 0, curses.A_REVERSE)
258 |         tmpwin_2.chgat(0, 0, curses.A_REVERSE)
259 | 
260 |         dstwin = curses.newwin(
261 |                 dst_h, dst_w,
262 |                 dirs_pos + int((maxtd_h - dst_h) / 2), 2 * tmp_w + tmpwin_12_gutter + tmpwin_dstwin_gutter)
263 |         dstwin.addstr(dst_report)
264 |         dstwin.chgat(0, 0, curses.A_REVERSE)
265 | 
266 |         #archwin = curses.newwin(arch_h, arch_w, dirs_pos + maxtd_h, 0)
267 |         #archwin.addstr(0, 0, 'Archive dirs free space', curses.A_REVERSE)
268 |         #archwin.addstr(1, 0, arch_report)
269 | 
270 |         # Log.  Could use a pad here instead of managing scrolling ourselves, but
271 |         # this seems easier.
272 |         log_win.addnstr(0, 0, ('Log: %d (<up>/<down>/<end> to scroll)\n' % log.get_cur_pos() ),
273 |                 linecap, curses.A_REVERSE)
274 |         for i, logline in enumerate(log.cur_slice(logscreen_height - 1)):
275 |             log_win.addnstr(i + 1, 0, logline, linecap)
276 | 
277 |         stdscr.noutrefresh()
278 |         header_win.noutrefresh()
279 |         jobs_win.noutrefresh()
280 |         tmpwin_1.noutrefresh()
281 |         tmpwin_2.noutrefresh()
282 |         dstwin.noutrefresh()
283 |         #archwin.noutrefresh()
284 |         log_win.noutrefresh()
285 |         curses.doupdate()
286 | 
287 |         key = stdscr.getch()
288 |         if key == curses.KEY_UP:
289 |             log.shift_slice(-1)
290 |             pressed_key = 'up'
291 |         elif key == curses.KEY_DOWN:
292 |             log.shift_slice(1)
293 |             pressed_key = 'dwn'
294 |         elif key == curses.KEY_END:
295 |             log.shift_slice_to_end()
296 |             pressed_key = 'end'
297 |         elif key == ord('p'):
298 |             plotting_active = not plotting_active
299 |             pressed_key = 'p'
300 |         elif key == ord('a'):
301 |             archiving_active = not archiving_active
302 |             pressed_key = 'a'
303 |         elif key == ord('q'):
304 |             break
305 |         else:
306 |             pressed_key = key
307 |     
308 | 
309 | def run_interactive():
310 |     locale.setlocale(locale.LC_ALL, '')
311 |     code = locale.getpreferredencoding()
312 |     # Then use code as the encoding for str.encode() calls.
313 | 
314 |     curses.wrapper(curses_main)
315 | 


--------------------------------------------------------------------------------
/job.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | # TODO do we use all these?
  4 | from datetime import datetime
  5 | from enum import Enum, auto
  6 | from subprocess import call
  7 | import argparse
  8 | 
  9 | import contextlib
 10 | import logging
 11 | import os
 12 | import re
 13 | import threading
 14 | import time
 15 | import psutil
 16 | import random
 17 | import sys
 18 | 
 19 | class UnmatchedJobError(Exception):
 20 |     pass
 21 | 
 22 | def job_phases_for_tmpdir(d, all_jobs):
 23 |     '''Return phase 2-tuples for jobs running on tmpdir d'''
 24 |     return sorted([j.progress() for j in all_jobs if j.tmpdir == d])
 25 | 
 26 | def job_phases_for_dstdir(d, all_jobs):
 27 |     '''Return phase 2-tuples for jobs outputting to dstdir d'''
 28 |     return sorted([j.progress() for j in all_jobs if j.dstdir == d])
 29 | 
 30 | def is_plotting_cmdline(cmdline):
 31 |    
 32 |     return (
 33 |         len(cmdline) >= 4
 34 |         #and 'python' in cmdline[0]
 35 |         and 'chia2.exe' in cmdline[0]
 36 |         and 'plots' == cmdline[1]
 37 |         and 'create' == cmdline[2]
 38 |     )
 39 | 
 40 | # TODO: be more principled and explicit about what we cache vs. what we look up
 41 | # dynamically from the logfile
 42 | class Job:
 43 |     'Represents a plotter job'
 44 | 
 45 |     # These are constants, not updated during a run.
 46 |     k = 0
 47 |     r = 0
 48 |     u = 0
 49 |     b = 0
 50 |     n = 0  # probably not used
 51 |     tmpdir = ''
 52 |     tmp2dir = ''
 53 |     dstdir = ''
 54 |     logfile = ''
 55 |     jobfile = ''
 56 |     job_id = 0
 57 |     plot_id = '--------'
 58 |     proc = None   # will get a psutil.Process
 59 | 
 60 |     # These are dynamic, cached, and need to be udpated periodically
 61 |     phase = (None, None)   # Phase/subphase
 62 | 
 63 |     def get_running_jobs(logroot, cached_jobs=()):
 64 |         '''Return a list of running plot jobs.  If a cache of preexisting jobs is provided,
 65 |            reuse those previous jobs without updating their information.  Always look for
 66 |            new jobs not already in the cache.'''
 67 |         jobs = []
 68 |         cached_jobs_by_pid = { j.proc.pid: j for j in cached_jobs }
 69 | 
 70 |         for proc in psutil.process_iter(['pid', 'cmdline']):
 71 |             try:
 72 |                 # Ignore processes which most likely have terminated between the time of
 73 |                 # iteration and data access.
 74 |                 with contextlib.suppress(psutil.NoSuchProcess):
 75 |                     if is_plotting_cmdline(proc.cmdline()):
 76 |                         if proc.pid in cached_jobs_by_pid.keys():
 77 |                             jobs.append(cached_jobs_by_pid[proc.pid])  # Copy from cache
 78 |                         else:
 79 |                             with contextlib.suppress(UnmatchedJobError):
 80 |                                 jobs.append(Job(proc, logroot))
 81 |             except (PermissionError, psutil.AccessDenied):
 82 |                 test=0
 83 |                 #print ("Permission error or access denied on process")
 84 |         return jobs
 85 | 
 86 |  
 87 |     def __init__(self, proc, logroot):
 88 |         '''Initialize from an existing psutil.Process object.  must know logroot in order to understand open files'''
 89 |         self.proc = proc
 90 | 
 91 |         with self.proc.oneshot():
 92 |             # Parse command line args
 93 |             args = self.proc.cmdline()
 94 |             assert len(args) > 4
 95 |             assert 'chia2.exe' in args[0]
 96 |             assert 'plots' == args[1]
 97 |             assert 'create' == args[2]
 98 |             args_iter = iter(args[3:])
 99 |             for arg in args_iter:
100 |                 val = None if arg in ['-e'] else next(args_iter)
101 |                 if arg == '-k':
102 |                     self.k = val
103 |                 elif arg == '-r':
104 |                     self.r = val
105 |                 elif arg == '-b':
106 |                     self.b = val
107 |                 elif arg == '-u':
108 |                     self.u = val
109 |                 elif arg == '-t':
110 |                     self.tmpdir = val
111 |                 elif arg == '-2':
112 |                     self.tmp2dir = val
113 |                 elif arg == '-d':
114 |                     self.dstdir = val
115 |                 elif arg == '-n':
116 |                     self.n = val
117 |                 elif arg == '-e':
118 |                     pass
119 |                     # TODO: keep track of -e
120 |                 else:
121 |                     print('Warning: unrecognized args: %s %s' % (arg, val))
122 | 
123 |             # Find logfile (whatever file is open under the log root).  The
124 |             # file may be open more than once, e.g. for STDOUT and STDERR.
125 |             for f in self.proc.open_files():
126 |                 if logroot in f.path:
127 |                     if self.logfile:
128 |                         assert self.logfile == f.path
129 |                     else:
130 |                         self.logfile = f.path
131 |                     break
132 | 
133 |             # Initialize data that needs to be loaded from the logfile
134 |             self.init_from_logfile()
135 | 
136 | 
137 |     def init_from_logfile(self):
138 |         '''Read plot ID and job start time from logfile.  Return true if we
139 |            find all the info as expected, false otherwise'''
140 |         if not self.logfile:
141 |             raise UnmatchedJobError()
142 |         # Try reading for a while; it can take a while for the job to get started as it scans
143 |         # existing plot dirs (especially if they are NFS).
144 |         found_id = False
145 |         found_log = False
146 |         for attempt_number in range(3):
147 |             with open(self.logfile, 'r') as f:
148 |                 for line in f:
149 |                     m = re.match('^ID: ([0-9a-f]*)', line)
150 |                     if m:
151 |                         self.plot_id = m.group(1)
152 |                         found_id = True
153 |                     m = re.match(r'^Starting phase 1/4:.*\.\.\. (.*)', line)
154 |                     if m:
155 |                         # Mon Nov  2 08:39:53 2020
156 |                         self.start_time = datetime.strptime(m.group(1), '%a %b  %d %H:%M:%S %Y')
157 |                         found_log = True
158 |                         break  # Stop reading lines in file
159 | 
160 |             if found_id and found_log:
161 |                 break  # Stop trying
162 |             else:
163 |                 time.sleep(1)  # Sleep and try again
164 | 
165 |         # If we couldn't find the line in the logfile, the job is probably just getting started
166 |         # (and being slow about it).  In this case, use the last metadata change as the start time.
167 |         # TODO: we never come back to this; e.g. plot_id may remain uninitialized.
168 |         if not found_log:
169 |             self.start_time = datetime.fromtimestamp(os.path.getctime(self.logfile))
170 | 
171 |         # Load things from logfile that are dynamic
172 |         self.update_from_logfile()
173 | 
174 |     def update_from_logfile(self):
175 |         self.set_phase_from_logfile()
176 | 
177 |     def set_phase_from_logfile(self):
178 |     
179 |         assert self.logfile
180 | 
181 |         # Map from phase number to subphase number reached in that phase.
182 |         # Phase 1 subphases are <started>, table1, table2, ...
183 |         # Phase 2 subphases are <started>, table7, table6, ...
184 |         # Phase 3 subphases are <started>, tables1&2, tables2&3, ...
185 |         # Phase 4 subphases are <started>
186 |         phase_subphases = {}
187 |        
188 |         with open(self.logfile, 'r') as f:
189 |             for line in f:
190 |                 # "Starting phase 1/4: Forward Propagation into tmp files... Sat Oct 31 11:27:04 2020"
191 |                 m = re.match(r'^Starting phase (\d).*', line)
192 |                 if m:
193 |                     phase = int(m.group(1))
194 |                     phase_subphases[phase] = 0
195 | 
196 |                 # Phase 1: "Computing table 2"
197 |                 m = re.match(r'^Computing table (\d).*', line)
198 |                 if m:
199 |                     phase_subphases[1] = max(phase_subphases[1], int(m.group(1)))
200 | 
201 |                 # Phase 2: "Backpropagating on table 2"
202 |                 m = re.match(r'^Backpropagating on table (\d).*', line)
203 |                 if m:
204 |                     phase_subphases[2] = max(phase_subphases[2], 7 - int(m.group(1)))
205 | 
206 |                 # Phase 3: "Compressing tables 4 and 5"
207 |                 m = re.match(r'^Compressing tables (\d) and (\d).*', line)
208 |                 if m:
209 |                     phase_subphases[3] = max(phase_subphases[3], int(m.group(1)))
210 | 
211 |                 # TODO also collect timing info:
212 | 
213 |                 # "Time for phase 1 = 22796.7 seconds. CPU (98%) Tue Sep 29 17:57:19 2020"
214 |                 # for phase in ['1', '2', '3', '4']:
215 |                     # m = re.match(r'^Time for phase ' + phase + ' = (\d+.\d+) seconds..*', line)
216 |                         # data.setdefault....
217 | 
218 |                 # Total time = 49487.1 seconds. CPU (97.26%) Wed Sep 30 01:22:10 2020
219 |                 # m = re.match(r'^Total time = (\d+.\d+) seconds.*', line)
220 |                 # if m:
221 |                     # data.setdefault(key, {}).setdefault('total time', []).append(float(m.group(1)))
222 | 
223 |         if phase_subphases:
224 |             phase = max(phase_subphases.keys())
225 |             self.phase = (phase, phase_subphases[phase])
226 |         else:
227 |             self.phase = (0, 0)
228 | 
229 |     def progress(self):
230 |         '''Return a 2-tuple with the job phase and subphase (by reading the logfile)'''
231 |         return self.phase
232 | 
233 |     def plot_id_prefix(self):
234 |         return self.plot_id[:8]
235 | 
236 |     # TODO: make this more useful and complete, and/or make it configurable
237 |     def status_str_long(self):
238 |         return '{plot_id}\nk={k} r={r} b={b} u={u}\npid:{pid}\ntmp:{tmp}\ntmp2:{tmp2}\ndst:{dst}\nlogfile:{logfile}'.format(
239 |             plot_id = self.plot_id,
240 |             k = self.k,
241 |             r = self.r,
242 |             b = self.b,
243 |             u = self.u,
244 |             pid = self.proc.pid,
245 |             tmp = self.tmpdir,
246 |             tmp2 = self.tmp2dir,
247 |             dst = self.dstdir,
248 |             plotid = self.plot_id,
249 |             logfile = self.logfile
250 |             )
251 | 
252 |     def get_mem_usage(self):
253 |         return self.proc.memory_info().vms  # Total, inc swapped
254 | 
255 |     def get_tmp_usage(self):
256 |         total_bytes = 0
257 |         with os.scandir(self.tmpdir) as it:
258 |             for entry in it:
259 |                 if self.plot_id in entry.name:
260 |                     try:
261 |                         total_bytes += entry.stat().st_size
262 |                     except FileNotFoundError:
263 |                         # The file might disappear; this being an estimate we don't care
264 |                         pass
265 |         return total_bytes
266 | 
267 |     def get_run_status(self):
268 |         '''Running, suspended, etc.'''
269 |         status = self.proc.status()
270 |         if status == psutil.STATUS_RUNNING:
271 |             return 'RUN'
272 |         elif status == psutil.STATUS_SLEEPING:
273 |             return 'SLP'
274 |         elif status == psutil.STATUS_DISK_SLEEP:
275 |             return 'DSK'
276 |         elif status == psutil.STATUS_STOPPED:
277 |             return 'STP'
278 |         else:
279 |             return self.proc.status()
280 | 
281 |     def get_time_wall(self):
282 |         return int((datetime.now() - self.start_time).total_seconds())
283 | 
284 |     def get_time_user(self):
285 |         return int(self.proc.cpu_times().user)
286 | 
287 |     def get_time_sys(self):
288 |         return int(self.proc.cpu_times().system)
289 | 
290 |     def get_time_iowait(self):
291 |        return 0
292 |         #return int(self.proc.cpu_times().iowait)
293 | 
294 |     def suspend(self, reason=''):
295 |         self.proc.suspend()
296 |         self.status_note = reason
297 | 
298 |     def resume(self):
299 |         self.proc.resume()
300 | 
301 |     def get_temp_files(self):
302 |         temp_files = []
303 |         for f in self.proc.open_files():
304 |             if self.tmpdir in f.path or self.tmp2dir in f.path or self.dstdir in f.path:
305 |                 temp_files.append(f.path)
306 |         return temp_files
307 | 
308 |     def cancel(self):
309 |         'Cancel an already running job'
310 |         # We typically suspend the job as the first action in killing it, so it
311 |         # doesn't create more tmp files during death.  However, terminate() won't
312 |         # complete if the job is supsended, so we also need to resume it.
313 |         # TODO: check that this is best practice for killing a job.
314 |         self.proc.resume()
315 |         self.proc.terminate()
316 | 
317 |     def check_status(self, expected_status):
318 |         if (self.status == expected_status):
319 |             return 1
320 |         else:
321 |             print('Expected status %s, actual %s', expected_status, self.status)
322 |             return 0
323 | 


--------------------------------------------------------------------------------
/manager.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | from datetime import datetime
  4 | 
  5 | import logging
  6 | import operator
  7 | import os
  8 | import re
  9 | import threading
 10 | import time
 11 | import psutil
 12 | import random
 13 | import readline          # For nice CLI
 14 | import subprocess
 15 | import sys
 16 | from pathlib import Path, PureWindowsPath
 17 | 
 18 | # Plotman libraries
 19 | import job
 20 | import plot_util
 21 | import archive # for get_archdir_freebytes(). TODO: move to avoid import loop
 22 | 
 23 | # Constants
 24 | MIN = 60    # Seconds
 25 | HR = 3600   # Seconds
 26 | 
 27 | MAX_AGE = 1000_000_000   # Arbitrary large number of seconds
 28 | 
 29 | 
 30 |              
 31 | def dstdirs_to_furthest_phase(all_jobs):
 32 |     '''Return a map from dst dir to a phase tuple for the most progressed job
 33 |        that is emitting to that dst dir.'''
 34 |     result = {}
 35 |     for j in all_jobs:
 36 |         if not j.dstdir in result.keys() or result[j.dstdir] < j.progress():
 37 |             result[j.dstdir] = j.progress()
 38 |     return result
 39 | 
 40 | def dstdirs_to_youngest_phase(all_jobs):
 41 |     '''Return a map from dst dir to a phase tuple for the least progressed job
 42 |        that is emitting to that dst dir.'''
 43 |     result = {}
 44 |     for j in all_jobs:
 45 |         if not j.dstdir in result.keys() or result[j.dstdir] > j.progress():
 46 |             result[j.dstdir] = j.progress()
 47 |     return result
 48 | 
 49 | def phases_permit_new_job(phases, sched_cfg):
 50 |     '''Scheduling logic: return True if it's OK to start a new job on a tmp dir
 51 |        with existing jobs in the provided phases.'''
 52 |     if len(phases) == 0:
 53 |         return True
 54 | 
 55 |     milestone_1 = ( sched_cfg['tmpdir_stagger_phase_major'],
 56 |                     sched_cfg['tmpdir_stagger_phase_minor'] )
 57 |     # milestone_2 = (4, 0)
 58 | 
 59 |     if len([p for p in phases if p < milestone_1]) > 0:
 60 |         return False
 61 | 
 62 |     # if len([p for p in phases if milestone_1 <= p and p <milestone_2]) > 1:
 63 |         # return False
 64 | 
 65 |     # No more than 3 jobs total on the tmpdir
 66 |     if len(phases) >= sched_cfg['tmpdir_max_jobs']:
 67 |         return False
 68 | 
 69 |     return True
 70 | 
 71 | def maybe_start_new_plot(dir_cfg, sched_cfg, plotting_cfg):
 72 |     jobs = job.Job.get_running_jobs(dir_cfg['log'])
 73 | 
 74 |     wait_reason = None  # If we don't start a job this iteration, this says why.
 75 | 
 76 |     youngest_job_age = min(jobs, key=job.Job.get_time_wall).get_time_wall() if jobs else MAX_AGE
 77 |     global_stagger = int(sched_cfg['global_stagger_m'] * MIN)
 78 |     if (youngest_job_age < global_stagger):
 79 |         wait_reason = 'stagger (%ds/%ds)' % (
 80 |                 youngest_job_age, global_stagger)
 81 |     else:
 82 |         tmp_to_all_phases = [ (d, job.job_phases_for_tmpdir(d, jobs))
 83 |                 for d in dir_cfg['tmp'] ]
 84 |         eligible = [ (d, phases) for (d, phases) in tmp_to_all_phases
 85 |                 if phases_permit_new_job(phases, sched_cfg) ]
 86 |         rankable = [ (d, phases[0]) if phases else (d, (999, 999))
 87 |                 for (d, phases) in eligible ]
 88 |         
 89 |         if not eligible:
 90 |             wait_reason = 'no eligible tempdirs'
 91 |         else:
 92 |             # Plot to oldest tmpdir.
 93 |             tmpdir = max(rankable, key=operator.itemgetter(1))[0]
 94 | 
 95 |             # Select the dst dir least recently selected
 96 |             dir2ph = dstdirs_to_youngest_phase(jobs)
 97 |             unused_dirs = [d for d in dir_cfg['dst'] if d not in dir2ph.keys()]
 98 |             dstdir = ''
 99 |             if unused_dirs: 
100 |                 dstdir = random.choice(unused_dirs)
101 |             else:
102 |                 dstdir = max(dir2ph, key=dir2ph.get)
103 |             
104 |             
105 |             logpath = Path(dir_cfg['log'])
106 |             logfile = PureWindowsPath(logpath / datetime.now().strftime('%Y-%m-%d-%H.%M.%S.log'))
107 |             #logfile = os.path.join(Path(dir_cfg['log']),)
108 |             print(logfile)
109 |             
110 |             
111 |             
112 |             plot_args = [r'C:\Users\Wofl\AppData\Local\Chia-Blockchain\app-1.0.3\resources\app.asar.unpacked\daemon\chia2.exe','plots', 'create',
113 |                     '-k', str(plotting_cfg['k']),
114 |                     '-r', str(plotting_cfg['n_threads']),
115 |                     '-u', str(plotting_cfg['n_buckets']),
116 |                     '-b', str(plotting_cfg['job_buffer']),
117 |                     '-t', tmpdir,
118 |                     '-d', dstdir ]
119 |             if 'e' in plotting_cfg and plotting_cfg['e']:
120 |                 plot_args.append('-e')
121 |             if 'tmp2' in dir_cfg:
122 |                 plot_args.append('-2')
123 |                 plot_args.append(dir_cfg['tmp2'])
124 |             
125 |            # logfile = repr(logfile)
126 |             logmsg = ('Starting plot job: %s ; logging to %s' % (' '.join(plot_args), logfile))
127 |             
128 |             #print(logfile)
129 |             #print(logmsg)
130 |             # start_new_sessions to make the job independent of this controlling tty.
131 |             p = subprocess.Popen(plot_args,
132 |                 stdout=open(logfile, 'w'),
133 |                 stderr=subprocess.STDOUT,
134 |                 start_new_session=True)
135 |             psutil.Process(p.pid) #.nice(ABOVE_NORMAL_PRIORITY_CLASS)
136 |           #  print( psutil.Process(p.pid).cmdline())
137 |           #  x = psutil.Process(p.pid)
138 |          #   x.nice(15)
139 |             return (True, logmsg)
140 | 
141 |     return (False, wait_reason)
142 | 
143 | def select_jobs_by_partial_id(jobs, partial_id):
144 |     selected = []
145 |     for j in jobs:
146 |         if j.plot_id.startswith(partial_id):
147 |             selected.append(j)
148 |     return selected
149 | 
150 | 


--------------------------------------------------------------------------------
/plot_util.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import os
 3 | import re
 4 | 
 5 | import shutil
 6 | 
 7 | GB = 1_000_000_000
 8 | def df_b2(d):
 9 |     'Return free space for directory (in bytes)'
10 |     stat = os.statvfs(d)
11 |     return stat.f_frsize * stat.f_bfree 
12 |     
13 | def df_b(d):
14 |     stat = shutil.disk_usage(d)
15 |     return(stat[2])
16 |     
17 | 
18 | 
19 | def get_k32_plotsize():
20 |     return 108 * GB
21 | 
22 | def human_format(num, precision):
23 |     magnitude = 0
24 |     while abs(num) >= 1000:
25 |         magnitude += 1
26 |         num /= 1000.0
27 |     return (('%.' + str(precision) + 'f%s') %
28 |             (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude]))
29 | 
30 | def time_format(sec):
31 |     if sec < 60:
32 |         return '%ds' % sec
33 |     else:
34 |         return '%d:%02d' % (int(sec / 3600), int((sec % 3600) / 60))
35 | 
36 | def tmpdir_phases_str(tmpdir_phases_pair):
37 |     tmpdir = tmpdir_phases_pair[0]
38 |     phases = tmpdir_phases_pair[1]
39 |     phase_str = ', '.join(['%d:%d' % ph_subph for ph_subph in sorted(phases)])
40 |     return ('%s (%s)' % (tmpdir, phase_str))
41 | 
42 | def split_path_prefix(items):
43 |     if not items:
44 |         return ('', [])
45 | 
46 |     prefix = '' #os.path.commonpath(items) #commonoath doesn't work on Windows
47 |     if prefix == '/':
48 |         return ('', items)
49 |     else:
50 |         remainders = [ os.path.relpath(i, prefix) for i in items ]
51 |         return (prefix, remainders)
52 | 
53 | def list_k32_plots(d):
54 |     'List completed k32 plots in a directory (not recursive)'
55 |     plots = []
56 |     for plot in os.listdir(d):
57 |         if re.match(r'^plot-k32-.*plot$', plot):
58 |             plot = os.path.join(d, plot)
59 |             try:
60 |                 if os.stat(plot).st_size > (0.95 * get_k32_plotsize()):
61 |                     plots.append(plot)
62 |             except Exception:
63 |                 'file not found'
64 |     
65 |     return plots
66 | 
67 | def column_wrap(items, n_cols, filler=None):
68 |     '''Take items, distribute among n_cols columns, and return a set
69 |        of rows containing the slices of those columns.'''
70 |     rows = []
71 |     n_rows = math.ceil(len(items) / n_cols)
72 |     for row in range(n_rows):
73 |         row_items = items[row : : n_rows]
74 |         # Pad and truncate
75 |         rows.append( (row_items + ([filler] * n_cols))[:n_cols] )
76 |     return rows
77 | 
78 | 


--------------------------------------------------------------------------------
/plotman.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | 
  4 | from datetime import datetime
  5 | from subprocess import call
  6 | 
  7 | import argparse
  8 | import os
  9 | import re
 10 | import threading
 11 | import random
 12 | #import readline
 13 | import readline          # For nice CLI
 14 | import sys
 15 | import time
 16 | import yaml
 17 | import shutil
 18 | 
 19 | # Plotman libraries
 20 | from job import Job
 21 | import analyzer
 22 | import archive
 23 | import interactive
 24 | import manager
 25 | import plot_util
 26 | import reporting
 27 | 
 28 | class PlotmanArgParser:
 29 |     def add_idprefix_arg(self, subparser):
 30 |         subparser.add_argument(
 31 |                 'idprefix',
 32 |                 type=str,
 33 |                 nargs='+',
 34 |                 help='disambiguating prefix of plot ID')
 35 | 
 36 |     def parse_args(self):
 37 |         parser = argparse.ArgumentParser(description='Chia plotting manager.')
 38 |         sp = parser.add_subparsers(dest='cmd')
 39 | 
 40 |         p_status = sp.add_parser('status', help='show current plotting status')
 41 |  
 42 |         p_dirs = sp.add_parser('dirs', help='show directories info')
 43 | 
 44 |         p_interactive = sp.add_parser('interactive', help='run interactive control/montioring mode')
 45 | 
 46 |         p_dst_sch = sp.add_parser('dsched', help='print destination dir schedule')
 47 | 
 48 |         p_plot = sp.add_parser('plot', help='run plotting loop')
 49 | 
 50 |         p_archive = sp.add_parser('archive',
 51 |                 help='move completed plots to farming location')
 52 | 
 53 |         p_details = sp.add_parser('details', help='show details for job')
 54 |         self.add_idprefix_arg(p_details)
 55 | 
 56 |         p_files = sp.add_parser('files', help='show temp files associated with job')
 57 |         self.add_idprefix_arg(p_files)
 58 | 
 59 |         p_kill = sp.add_parser('kill', help='kill job (and cleanup temp files)')
 60 |         self.add_idprefix_arg(p_kill)
 61 | 
 62 |         p_suspend = sp.add_parser('suspend', help='suspend job')
 63 |         self.add_idprefix_arg(p_suspend)
 64 | 
 65 |         p_resume = sp.add_parser('resume', help='resume suspended job')
 66 |         self.add_idprefix_arg(p_resume)
 67 | 
 68 |         p_analyze = sp.add_parser('analyze',
 69 |                 help='analyze timing stats of completed jobs')
 70 |         p_analyze.add_argument('logfile', type=str, nargs='+',
 71 |                 help='logfile(s) to analyze')
 72 | 
 73 |         args = parser.parse_args()
 74 |         return args
 75 | 
 76 | 
 77 | if __name__ == "__main__":
 78 |     random.seed()
 79 | 
 80 |     pm_parser = PlotmanArgParser()
 81 |     args = pm_parser.parse_args()
 82 |     
 83 |     with open('config.yaml', 'r') as ymlfile:
 84 |         cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)
 85 |     dir_cfg = cfg['directories']
 86 |     sched_cfg = cfg['scheduling']
 87 |     plotting_cfg = cfg['plotting']
 88 | 
 89 |     #
 90 |     # Stay alive, spawning plot jobs
 91 |     #
 92 |     if args.cmd == 'plot':
 93 |         print('...starting plot loop')
 94 |         while True:
 95 |             wait_reason = manager.maybe_start_new_plot(dir_cfg, sched_cfg, plotting_cfg)
 96 | 
 97 |             # TODO: report this via a channel that can be polled on demand, so we don't spam the console
 98 |             sleep_s = int(sched_cfg['polling_time_s'])
 99 |             if wait_reason:
100 |                 print('...sleeping %d s: %s' % (sleep_s, wait_reason))
101 | 
102 |             time.sleep(sleep_s)
103 |     
104 |     #
105 |     # Analysis of completed jobs
106 |     #
107 |     elif args.cmd == 'analyze':
108 |         analyzer = analyzer.LogAnalyzer()
109 |         analyzer.analyze(args.logfile)
110 | 
111 |     else:
112 |         # print('...scanning process tables')
113 |         jobs = Job.get_running_jobs(dir_cfg['log'])
114 | 
115 |         # Status report
116 |         if args.cmd == 'status':
117 |             (columns, rows) = shutil.get_terminal_size()
118 |             print(reporting.status_report(jobs, int(columns)))
119 | 
120 |         # Directories report
121 |         elif args.cmd == 'dirs':
122 |             (columns, rows) = shutil.get_terminal_size()
123 |             print(reporting.dirs_report(jobs, dir_cfg, sched_cfg, int(columns)))
124 | 
125 |         elif args.cmd == 'interactive':
126 |             interactive.run_interactive()
127 | 
128 |         # Start running archival
129 |         elif args.cmd == 'archive':
130 |             print('...starting archive loop')
131 |             firstit = True
132 |             while True:
133 |                 if not firstit:
134 |                     print('Sleeping 60s until next iteration...')
135 |                     time.sleep(60)
136 |                     jobs = Job.get_running_jobs(dir_cfg['log'])
137 |                 firstit = False
138 |                 archive.archive(dir_cfg, jobs)
139 | 
140 |         # Debugging: show the destination drive usage schedule
141 |         elif args.cmd == 'dsched':
142 |             dstdirs = dir_cfg['dst']
143 |             for (d, ph) in manager.dstdirs_to_furthest_phase(jobs).items():
144 |                 print('  %s : %s' % (d, str(ph)))
145 |         
146 |         #
147 |         # Job control commands
148 |         #
149 |         elif args.cmd in [ 'details', 'files', 'kill', 'suspend', 'resume' ]:
150 |             print(args)
151 | 
152 |             selected = []
153 | 
154 |             # TODO: clean up treatment of wildcard
155 |             if args.idprefix[0] == 'all':
156 |                 selected = jobs
157 |             else:
158 |                 # TODO: allow multiple idprefixes, not just take the first
159 |                 selected = manager.select_jobs_by_partial_id(jobs, args.idprefix[0])
160 |                 if (len(selected) == 0):
161 |                     print('Error: %s matched no jobs.' % id_spec)
162 |                 elif len(selected) > 1:
163 |                     print('Error: "%s" matched multiple jobs:' % id_spec)
164 |                     for j in selected:
165 |                         print('  %s' % j.plot_id)
166 |                     selected = []
167 | 
168 |             for job in selected:
169 |                 if args.cmd == 'details':
170 |                     print(job.status_str_long())
171 | 
172 |                 elif args.cmd == 'files':
173 |                     temp_files = job.get_temp_files()
174 |                     for f in temp_files:
175 |                         print('  %s' % f)
176 | 
177 |                 elif args.cmd == 'kill':
178 |                     # First suspend so job doesn't create new files
179 |                     print('Pausing PID %d, plot id %s' % (job.proc.pid, job.plot_id))
180 |                     job.suspend()
181 | 
182 |                     temp_files = job.get_temp_files()
183 |                     print('Will kill pid %d, plot id %s' % (job.proc.pid, job.plot_id))
184 |                     print('Will delete %d temp files' % len(temp_files))
185 |                     conf = input('Are you sure? ("y" to confirm): ')
186 |                     if (conf != 'y'):
187 |                         print('canceled.  If you wish to resume the job, do so manually.')
188 |                     else:
189 |                         print('killing...')
190 |                         job.cancel()
191 |                         print('cleaing up temp files...')
192 |                         for f in temp_files:
193 |                             os.remove(f)
194 | 
195 |                 elif args.cmd == 'suspend':
196 |                     print('Suspending ' + job.plot_id)
197 |                     job.suspend()
198 |                 elif args.cmd == 'resume':
199 |                     print('Resuming ' + job.plot_id)
200 |                     job.resume()
201 | 
202 | 


--------------------------------------------------------------------------------
/reporting.py:
--------------------------------------------------------------------------------
  1 | import texttable as tt   # from somewhere?
  2 | import os
  3 | import psutil
  4 | 
  5 | import archive
  6 | import job
  7 | import manager
  8 | import math
  9 | import plot_util
 10 | 
 11 | def abbr_path(path, putative_prefix):
 12 |     if putative_prefix and path.startswith(putative_prefix):
 13 |         return os.path.relpath(path, putative_prefix)
 14 |     else:
 15 |         return path
 16 | 
 17 | def phases_str(phases, max_num=None):
 18 |     '''Take a list of phase-subphase pairs and return them as a compact string'''
 19 |     if not max_num or len(phases) <= max_num:
 20 |         return ' '.join(['%d:%d' % pair for pair in phases])
 21 |     else:
 22 |         n_first = math.floor(max_num / 2)
 23 |         n_last = max_num - n_first
 24 |         n_elided = len(phases) - (n_first + n_last)
 25 |         first = ' '.join(['%d:%d' % pair for pair in phases[:n_first]])
 26 |         elided = " [+%d] " % n_elided
 27 |         last = ' '.join(['%d:%d' % pair for pair in phases[n_first + n_elided:]])
 28 |         return first + elided + last
 29 | 
 30 | def n_at_ph(jobs, ph):
 31 |     return sum([1 for j in jobs if j.progress() == ph])
 32 | 
 33 | def n_to_char(n):
 34 |     n_to_char_map = dict(enumerate(" .:;!"))
 35 |     
 36 |     if n < 0:
 37 |         return 'X'  # Should never be negative
 38 |     elif n >= len(n_to_char_map):
 39 |         n = len(n_to_char_map) - 1
 40 |     
 41 |     return n_to_char_map[n]
 42 | 
 43 | def job_viz(jobs):
 44 |     # TODO: Rewrite this in a way that ensures we count every job
 45 |     # even if the reported phases don't line up with expectations.
 46 |     result = ''
 47 |     result += '1'
 48 |     for i in range(0, 8):
 49 |         result += n_to_char(n_at_ph(jobs, (1, i)))
 50 |     result += '2'
 51 |     for i in range(0, 8):
 52 |         result += n_to_char(n_at_ph(jobs, (2, i)))
 53 |     result += '3'
 54 |     for i in range(0, 7):
 55 |         result += n_to_char(n_at_ph(jobs, (3, i)))
 56 |     result += '4'
 57 |     result += n_to_char(n_at_ph(jobs, (4, 0)))
 58 |     return result
 59 |     
 60 | 
 61 | def status_report(jobs, width, height=None, tmp_prefix='', dst_prefix=''):
 62 |     '''height, if provided, will limit the number of rows in the table,
 63 |        showing first and last rows, row numbers and an elipsis in the middle.'''
 64 |     abbreviate_jobs_list = False
 65 |     n_begin_rows = 0
 66 |     n_end_rows = 0
 67 |     if height and height < len(jobs) + 1:  # One row for header
 68 |         abbreviate_jobs_list = True
 69 | 
 70 |     if abbreviate_jobs_list:
 71 |         n_rows = height - 2  # One for header, one for elipsis
 72 |         n_begin_rows = int(n_rows / 2)
 73 |         n_end_rows = n_rows - n_begin_rows
 74 | 
 75 |     tab = tt.Texttable()
 76 |     headings = ['plot id', 'k', 'tmp', 'dst', 'wall', 'phase', 'tmp',
 77 |             'pid', 'stat', 'mem', 'user', 'sys', 'io']
 78 |     if height:
 79 |         headings.insert(0, '#')
 80 |     tab.header(headings)
 81 |     tab.set_cols_dtype('t' * len(headings))
 82 |     tab.set_cols_align('r' * len(headings))
 83 |     tab.set_header_align('r' * len(headings))
 84 |     for i, j in enumerate(sorted(jobs, key=job.Job.get_time_wall)):
 85 |         # Elipsis row
 86 |         if abbreviate_jobs_list and i == n_begin_rows:
 87 |             row = ['...'] + ([''] * 13)
 88 |         # Omitted row
 89 |         elif abbreviate_jobs_list and i > n_begin_rows and i < (len(jobs) - n_end_rows):
 90 |             continue
 91 | 
 92 |         # Regular row
 93 |         else:
 94 |             try:
 95 |                 row = [j.plot_id[:8] + '...',
 96 |                     j.k,
 97 |                     abbr_path(j.tmpdir, tmp_prefix),
 98 |                     abbr_path(j.dstdir, dst_prefix),
 99 |                     plot_util.time_format(j.get_time_wall()),
100 |                     '%d:%d' % j.progress(),
101 |                     plot_util.human_format(j.get_tmp_usage(), 0),
102 |                     j.proc.pid,
103 |                     j.get_run_status(),
104 |                     plot_util.human_format(j.get_mem_usage(), 1),
105 |                     plot_util.time_format(j.get_time_user()),
106 |                     plot_util.time_format(j.get_time_sys()),
107 |                     plot_util.time_format(j.get_time_iowait())
108 |                     ]
109 |             except psutil.NoSuchProcess:
110 |                 # In case the job has disappeared
111 |                 row = [j.plot_id[:8] + '...'] + (['--'] * 12)
112 | 
113 |             if height:
114 |                 row.insert(0, '%3d' % i)
115 | 
116 |         tab.add_row(row)
117 | 
118 |     tab.set_max_width(width)
119 |     tab.set_deco(0)  # No borders
120 |     # return ('tmp dir prefix: %s ; dst dir prefix: %s\n' % (tmp_prefix, dst_prefix)
121 |     return tab.draw()
122 | 
123 | def tmp_dir_report(jobs, tmpdirs, sched_cfg, width, start_row=None, end_row=None, prefix=''):
124 |     '''start_row, end_row let you split the table up if you want'''
125 |     tab = tt.Texttable()
126 |     headings = ['tmp', 'ready', 'phases']
127 |     tab.header(headings)
128 |     tab.set_cols_dtype('t' * len(headings))
129 |     tab.set_cols_align('r' * (len(headings) - 1) + 'l')
130 |     for i, d in enumerate(sorted(tmpdirs)):
131 |         if (start_row and i < start_row) or (end_row and i >= end_row):
132 |             continue
133 |         phases = sorted(job.job_phases_for_tmpdir(d, jobs))
134 |         ready = manager.phases_permit_new_job(phases, sched_cfg)
135 |         row = [abbr_path(d, prefix), 'OK' if ready else '--', phases_str(phases)]
136 |         tab.add_row(row)
137 | 
138 |     tab.set_max_width(width)
139 |     tab.set_deco(tt.Texttable.BORDER | tt.Texttable.HEADER )
140 |     tab.set_deco(0)  # No borders
141 |     return tab.draw()
142 |  
143 | def dst_dir_report(jobs, dstdirs, width, prefix=''):
144 |     tab = tt.Texttable()
145 |     dir2oldphase = manager.dstdirs_to_furthest_phase(jobs)
146 |     dir2newphase = manager.dstdirs_to_youngest_phase(jobs)
147 |     headings = ['dst', 'plots', 'GBfree', 'inbnd phases', 'pri']
148 |     tab.header(headings)
149 |     tab.set_cols_dtype('t' * len(headings))
150 | 
151 |     for d in sorted(dstdirs):
152 |         # TODO: This logic is replicated in archive.py's priority computation,
153 |         # maybe by moving more of the logic in to directory.py
154 |         eldest_ph = dir2oldphase.get(d, (0, 0))
155 |         phases = job.job_phases_for_dstdir(d, jobs)
156 | 
157 |         dir_plots = plot_util.list_k32_plots(d)
158 |         
159 |         gb_free = int(plot_util.df_b(d) / plot_util.GB)
160 |         
161 |         n_plots = len(dir_plots)
162 |         priority = archive.compute_priority(eldest_ph, gb_free, n_plots) 
163 |         row = [abbr_path(d, prefix), n_plots, gb_free,
164 |                 phases_str(phases, 5), priority]
165 |         tab.add_row(row)
166 |     tab.set_max_width(width)
167 |     tab.set_deco(tt.Texttable.BORDER | tt.Texttable.HEADER )
168 |     tab.set_deco(0)  # No borders
169 |     return tab.draw()
170 | 
171 | def arch_dir_report(archdir_freebytes, width, prefix=''):
172 |     cells = ['%s:%5dGB' % (abbr_path(d, prefix), int(int(space) / plot_util.GB))
173 |             for (d, space) in sorted(archdir_freebytes.items())]
174 |     if not cells:
175 |         return ''
176 | 
177 |     n_columns = int(width / (len(max(cells, key=len)) + 3))
178 |     tab = tt.Texttable()
179 |     tab.set_max_width(width)
180 |     for row in plot_util.column_wrap(cells, n_columns, filler=''):
181 |         tab.add_row(row)
182 |     tab.set_cols_align('r' * (n_columns))
183 |     tab.set_deco(tt.Texttable.VLINES)
184 |     return tab.draw()
185 | 
186 | # TODO: remove this
187 | def dirs_report(jobs, dir_cfg, sched_cfg, width):
188 |     tmpdirs = dir_cfg['tmp']
189 |     dstdirs = dir_cfg['dst']
190 |     arch_cfg = dir_cfg['archive']
191 |     return (tmp_dir_report(jobs, tmpdirs, sched_cfg, width) + '\n' +
192 |             dst_dir_report(jobs, dstdirs, width) + '\n' +
193 |             'archive dirs free space:\n' +
194 |             arch_dir_report(archive.get_archdir_freebytes(arch_cfg), width) + '\n')
195 | 
196 | 
197 | 


--------------------------------------------------------------------------------