├── .gitignore
├── gvp.sh
├── gvp-client.sh
├── README.md
├── gvp-README.html
├── LICENSE
├── extract-gl-client-prof.py
└── extract-glvolprof.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 


--------------------------------------------------------------------------------
/gvp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # gvp.sh - collect performance data from Gluster about a particular Gluster volume
 3 | # usage: 
 4 | # bash gvp.sh your-gluster-volume sample-count sample-interval output-file
 5 | # output-file is optional, defaults to gvp.log
 6 | #
 7 | # this version of the script puts the data in pbench format
 8 | # for HTML graph generation
 9 | #
10 | volume_name=$1
11 | sample_count=$2
12 | sample_interval=$3
13 | outfile=$4
14 | if [ "$sample_interval" = "" ]  ; then
15 |   echo "usage: gvp.sh your-gluster-volume sample-count sample-interval-sec [ output-file ] "
16 |   exit 1
17 | fi
18 | if [ -z "$outfile" ] ; then outfile=gvp.log ; fi
19 | 
20 | # start up profiling
21 | 
22 | gluster volume profile $volume_name start
23 | gluster volume profile $volume_name info > /tmp/past
24 | 
25 | # record a timestamp so we know when the data was collected
26 | # this lets us generate timestamps to put .csv output in pbench format
27 | 
28 | date +%Y-%m-%d-%H-%M > $outfile
29 | echo "$sample_interval $sample_count" >> $outfile
30 | 
31 | # generate samples  
32 | for min in `seq 1 $sample_count` ; do
33 |   sleep $sample_interval
34 |   gluster volume profile $volume_name info
35 | done >> $outfile 
36 | gluster volume profile $volume_name stop
37 | echo "output written to $outfile"
38 | 


--------------------------------------------------------------------------------
/gvp-client.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # gvp-client.sh - collect perf data from Gluster for client's usage 
 3 | # of Gluster volume from 1 mountpoint
 4 | #
 5 | # ASSUMPTION: "gluster volume profile your-volume start" has already happened
 6 | #
 7 | # usage: 
 8 | #  chmod u+x gvp-client.sh
 9 | #  ./gvp-client.sh your-gluster-volume your-client-mountpoint samples interval [ logfile ]
10 | #
11 | volume_name=$1
12 | mountpoint=$2
13 | sample_count=$3
14 | sample_interval=$4
15 | your_logfile=$5
16 | if [ "$sample_interval" = "" ]  ; then
17 |   echo "usage: gvp-client.sh your-gluster-volume your-client-mountpoint sample-count sample-interval-sec [ your-logfile ]"
18 |   exit 1
19 | fi
20 | 
21 | sample_cmd="setfattr -n trusted.io-stats-dump -v "
22 | 
23 | timestamp=`date +%Y-%m-%d-%H-%M`
24 | logfile=/var/tmp/gvp-client-${timestamp}.log
25 | if [ -n "$your_logfile" ] ; then
26 |   logfile="$your_logfile"
27 | fi
28 | 
29 | # make sure not polluted with previous data
30 | rm -f $logfile
31 | 
32 | # so next sample interval will be $sample_interval
33 | $sample_cmd /var/tmp/gvp.log $mountpoint
34 | 
35 | for min in `seq 1 $sample_count` ; do
36 |   sleep $sample_interval
37 |   rm -f /var/tmp/gvp.log
38 |   $sample_cmd /var/tmp/gvp.log $mountpoint
39 |   ( date ; cat /var/tmp/gvp.log ) >> $logfile
40 | done
41 | echo "output written to $logfile"
42 | 
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Introduction
  2 | 
  3 | The extract-glvolprof.py program is meant to assist with visualizing the performance of
  4 | a gluster volume, using the gluster volume profile command.  One of key concepts in Gluster is the FOP (File Operation).  This is the unit of work passed from the application down through the Gluster translator stack until it reaches the storage device.  FOP Types for file creation, reading, writing, and many others are observable with these tools.
  5 | 
  6 | 
  7 | Statistic types produced per FOP type by these scripts include:
  8 | 
  9 | - call rates - for example, how many requests of different types are made per sec
 10 | - % latency - what fraction of FOP response time is consumed by different FOP types
 11 | - avg. latency - average FOP response time
 12 | - minimum latency
 13 | - maximum latency
 14 | 
 15 | Where all latencies are in units of microseconds.
 16 | 
 17 | The profiling tools consist of a collection and extraction script.  Typically you run the collection script to collect the profile data on a Gluster client or server, and then copy the file to your local system to run the extraction tool, which is just a python text processing script and should run anywhere.
 18 | 
 19 | To install, after cloning this repo, install the pbench-web-server RPM, which contains some javascript libraries that are used by the HTML file above and provide common code to read CSV files and produce graphs using the nvd3 library.  This code comes from the pbench project at:
 20 | 
 21 | https://github.com/distributed-system-analysis/pbench
 22 | 
 23 | These tools produce a subdirectory containing java-script graphs that can be viewed with a web browser, as well as .csv-format files that can be loaded into a spreadsheet, for example.  The CSV subdirectory contains a
 24 | symlink "static", which points to the "static" subdirectory deployed by the pbench-web-server RPM. After you install the pbench-web-server RPM, you can just create a softlink to the javascript, like this:
 25 | 
 26 |     # yum install pbench-web-server
 27 |     # ln -sv static /var/www/html/static
 28 | 
 29 | # server-side profiling
 30 | 
 31 | Server-side profiling allows you to see activity across the entire Gluster volume for a specified number of periodic samples.  It also allows you to see variation in stats between bricks, which can help you identify hotspots in your system where load is unevenly distributed.  Results include:
 32 | 
 33 | * per-volume MB/s read and written
 34 | * per-brick MB/s read and written
 35 | * per-volume per-FOP latency stats + call rates
 36 | * per-brick per-FOP (File OPeration) latency stats + call rate
 37 | 
 38 | It consists of:
 39 | 
 40 | * gvp.sh: a bash script which runs the above command periodically for a number
 41 | of samples, storing the results in a file.
 42 | * extract_glvolprof.py: a python script that takes that output file
 43 | and massages it into a form that can be used for visualization & analysis 
 44 | 
 45 | One component of this directory is an HTML file that can be viewed in a
 46 | browser. The other is a bunch of CSV files containing the
 47 | data. These files can also be used with a spreadsheet application if
 48 | desired, to produce graphs that way
 49 | 
 50 | Copy the scripts to some Gluster server in your cluster, (i.e. where you can run gluster volume profile command) and run the gvp.sh script. As an illustration, let's say we want to run it every 60 seconds and 10 iterations
 51 | (10 minutes of operation) - in practice, you might want to
 52 | do that periodically, perhaps in a cron job, in order to see the behavior
 53 | of the cluster over time.
 54 | 
 55 | \# ./gvp.sh [VOLNAME] 10 60
 56 | 
 57 | Then run the extract script
 58 | on that output file:
 59 | 
 60 | \# python extract-glvolprof.py gvp.log
 61 | 
 62 | The output (a bunch of CSV files and an HTML summary page) is placed in a subdirectory called gvp.log\_csvdir. 
 63 | 
 64 | To see the graphs, fire up a browser and point it to the URL that the extract script printed, pointing to gvp-graphs.html .
 65 | 
 66 | # client-side profiling
 67 | 
 68 | Client-side profiling allows you to see activity as close to the application as possible, at the top of the Gluster translator stack.  This is particularly useful for identifying response time problems for the application  related to Gluster activity.  For example, Gluster replication causes a single application WRITE FOP to be transformed into multiple WRITE FOPs at the bricks within the volume where the file data resides.  The response time for the application's WRITE request may be significantly different from the brick-level WRITE FOP latencies, because it incorporates the network response time and cannot complete before the brick-level WRITE FOPs complete.
 69 | 
 70 | Copy the scripts to some directory on your client (i.e. where mountpoint is), and run the gvp-client.sh script. As an illustration, let's say we want to run it every 10 seconds and 12 iterations
 71 | (roughly two minutes of operation) - in practice, you might want to
 72 | do that periodically, perhaps in a cron job, in order to see the behavior
 73 | of the cluster over time.
 74 | 
 75 | \# ./gvp-client.sh [VOLNAME] [MOUNTPOINT] [SAMPLE-AMOUNT] [SAMPLE-DURATION-IN-SEC]
 76 | \# ./gvp-client.sh vol1 /rhgs/client/vol1 12 10
 77 | 
 78 | By default, the output file is called <code>gvp-client-[Timestamp].log</code> and saved in /var/tmp/. Then run the extract script
 79 | on that output file:
 80 | 
 81 | \# python extract-gl-client-prof.py /var/tmp/gvp-client-[Timestamp].log
 82 | 
 83 | The output (a bunch of CSV files and an HTML summary page) is placed in
 84 | a subdirectory in /var/tmp named similar to the supplied log file.  
 85 | 
 86 | To see the graphs, fire up a browser and point it to the URL that the extract
 87 | script printed, pointing to gvp-client-graphs.html
 88 | 
 89 | # implementation notes
 90 | 
 91 | In order to take advantage of pbench javascript graphing, then column 1 in the .csv is always the timestamp in milliseconds when that sample took place. This can be disabled by defining the environment variable SKIP\_PBENCH\_GRAPHING.
 92 | 
 93 | # appendix: detailed list of FOPs
 94 | 
 95 | Here are all the file operation types that Gluster supports upstream as of November 2015.  Looking for developers to correct descriptions here.  The ones that are typically encountered are marked with the letter C:
 96 | 
 97 | * ACCESS - ?
 98 | * CREATE - C - create a file
 99 | * DISCARD - support for trim?
100 | * ENTRYLK - lock a directory given its pathname?
101 | * FALLOCATE - allocate space for file without actually writing to it
102 | * FENTRYLK - lock a file given its handle
103 | * FGETXATTR - C - get named extended attribute value for a file (handle)
104 | * FINODELK - C - lock a file/directory for write/read
105 | * FLUSH - ensure all written data is persistently stored
106 | * FREMOVEXATTR - remove a named extended attribute from a file handle
107 | * FSETATTR - set value of metadata field (which ones?) for a file (handle)
108 | * FSETXATTR - C - set value of a named extended attribute for a file handle
109 | * FSTAT - get standard metadata about a file given its file handle
110 | * FSYNC - C - ensure all written data for a file is persistently stored
111 | * FSYNCDIR - ensure all directory entries in directory are persistently stored
112 | * FTRUNCATE - set file size to specified value, deallocating data beyond this point
113 | * FXATTROP - C - used by AFR replication?
114 | * GETXATTR - get value of named extended attribute
115 | * INODELK - lock a directory for write or for read
116 | * LINK - create a hard link
117 | * LK - lock?
118 | * LOOKUP - C - lookup file within directory
119 | * MKDIR - C - create directory
120 | * MKNOD - create device special file
121 | * OPEN - C - open a file
122 | * OPENDIR - C - open a directory (in preparation for READDIR)
123 | * RCHECKSUM - ?
124 | * READ - C - read data from a file
125 | * READDIR - C - read directory entries from a directory
126 | * READDIRP - C - read directory entries with standard metadata for each file (readdirplus)
127 | * READLINK - get the pathname of a file that a symlink is pointing to
128 | * RELEASE - C - let go of file handle (similar to close)
129 | * RELEASEDIR - let go of directory handle (similar to close)
130 | * REMOVEXATTR - remove a named extended attribute from a pathname?
131 | * RENAME - C - rename a file
132 | * RMDIR - C - remove a directory (assumes it is already empty)
133 | * SETATTR - set field in standard file metadata for pathname
134 | * SETXATTR - C - set named extended attribute value for file given pathname
135 | * STAT - C - get standard metadata for file given pathname
136 | * STATFS - get metadata for the filesystem
137 | * SYMLINK - create a softlink to specified pathname
138 | * TRUNCATE - truncate file at pathname to specified size
139 | * UNLINK - C - delete file
140 | * WRITE - C - write data to file
141 | * XATTROP - ?
142 | * ZEROFILL - write zeroes to the file in specified offset range
143 | 


--------------------------------------------------------------------------------
/gvp-README.html:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  3 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  4 | <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
  5 | <head>
  6 | <!-- 2015-10-26 Mon 19:38 -->
  7 | <meta  http-equiv="Content-Type" content="text/html;charset=utf-8" />
  8 | <meta  name="viewport" content="width=device-width, initial-scale=1" />
  9 | <title></title>
 10 | <meta  name="generator" content="Org-mode" />
 11 | <style type="text/css">
 12 |  <!--/*--><![CDATA[/*><!--*/
 13 |   .title  { text-align: center;
 14 |              margin-bottom: .2em; }
 15 |   .subtitle { text-align: center;
 16 |               font-size: medium;
 17 |               font-weight: bold;
 18 |               margin-top:0; }
 19 |   .todo   { font-family: monospace; color: red; }
 20 |   .done   { font-family: monospace; color: green; }
 21 |   .priority { font-family: monospace; color: orange; }
 22 |   .tag    { background-color: #eee; font-family: monospace;
 23 |             padding: 2px; font-size: 80%; font-weight: normal; }
 24 |   .timestamp { color: #bebebe; }
 25 |   .timestamp-kwd { color: #5f9ea0; }
 26 |   .org-right  { margin-left: auto; margin-right: 0px;  text-align: right; }
 27 |   .org-left   { margin-left: 0px;  margin-right: auto; text-align: left; }
 28 |   .org-center { margin-left: auto; margin-right: auto; text-align: center; }
 29 |   .underline { text-decoration: underline; }
 30 |   #postamble p, #preamble p { font-size: 90%; margin: .2em; }
 31 |   p.verse { margin-left: 3%; }
 32 |   pre {
 33 |     border: 1px solid #ccc;
 34 |     box-shadow: 3px 3px 3px #eee;
 35 |     padding: 8pt;
 36 |     font-family: monospace;
 37 |     overflow: auto;
 38 |     margin: 1.2em;
 39 |   }
 40 |   pre.src {
 41 |     position: relative;
 42 |     overflow: visible;
 43 |     padding-top: 1.2em;
 44 |   }
 45 |   pre.src:before {
 46 |     display: none;
 47 |     position: absolute;
 48 |     background-color: white;
 49 |     top: -10px;
 50 |     right: 10px;
 51 |     padding: 3px;
 52 |     border: 1px solid black;
 53 |   }
 54 |   pre.src:hover:before { display: inline;}
 55 |   pre.src-sh:before    { content: 'sh'; }
 56 |   pre.src-bash:before  { content: 'sh'; }
 57 |   pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
 58 |   pre.src-R:before     { content: 'R'; }
 59 |   pre.src-perl:before  { content: 'Perl'; }
 60 |   pre.src-java:before  { content: 'Java'; }
 61 |   pre.src-sql:before   { content: 'SQL'; }
 62 | 
 63 |   table { border-collapse:collapse; }
 64 |   caption.t-above { caption-side: top; }
 65 |   caption.t-bottom { caption-side: bottom; }
 66 |   td, th { vertical-align:top;  }
 67 |   th.org-right  { text-align: center;  }
 68 |   th.org-left   { text-align: center;   }
 69 |   th.org-center { text-align: center; }
 70 |   td.org-right  { text-align: right;  }
 71 |   td.org-left   { text-align: left;   }
 72 |   td.org-center { text-align: center; }
 73 |   dt { font-weight: bold; }
 74 |   .footpara { display: inline; }
 75 |   .footdef  { margin-bottom: 1em; }
 76 |   .figure { padding: 1em; }
 77 |   .figure p { text-align: center; }
 78 |   .inlinetask {
 79 |     padding: 10px;
 80 |     border: 2px solid gray;
 81 |     margin: 10px;
 82 |     background: #ffffcc;
 83 |   }
 84 |   #org-div-home-and-up
 85 |    { text-align: right; font-size: 70%; white-space: nowrap; }
 86 |   textarea { overflow-x: auto; }
 87 |   .linenr { font-size: smaller }
 88 |   .code-highlighted { background-color: #ffff00; }
 89 |   .org-info-js_info-navigation { border-style: none; }
 90 |   #org-info-js_console-label
 91 |     { font-size: 10px; font-weight: bold; white-space: nowrap; }
 92 |   .org-info-js_search-highlight
 93 |     { background-color: #ffff00; color: #000000; font-weight: bold; }
 94 |   /*]]>*/-->
 95 | </style>
 96 | <script type="text/javascript">
 97 | /*
 98 | @licstart  The following is the entire license notice for the
 99 | JavaScript code in this tag.
100 | 
101 | Copyright (C) 2012-2013 Free Software Foundation, Inc.
102 | 
103 | The JavaScript code in this tag is free software: you can
104 | redistribute it and/or modify it under the terms of the GNU
105 | General Public License (GNU GPL) as published by the Free Software
106 | Foundation, either version 3 of the License, or (at your option)
107 | any later version.  The code is distributed WITHOUT ANY WARRANTY;
108 | without even the implied warranty of MERCHANTABILITY or FITNESS
109 | FOR A PARTICULAR PURPOSE.  See the GNU GPL for more details.
110 | 
111 | As additional permission under GNU GPL version 3 section 7, you
112 | may distribute non-source (e.g., minimized or compacted) forms of
113 | that code without the copy of the GNU GPL normally required by
114 | section 4, provided you include this license notice and a URL
115 | through which recipients can access the Corresponding Source.
116 | 
117 | 
118 | @licend  The above is the entire license notice
119 | for the JavaScript code in this tag.
120 | */
121 | <!--/*--><![CDATA[/*><!--*/
122 |  function CodeHighlightOn(elem, id)
123 |  {
124 |    var target = document.getElementById(id);
125 |    if(null != target) {
126 |      elem.cacheClassElem = elem.className;
127 |      elem.cacheClassTarget = target.className;
128 |      target.className = "code-highlighted";
129 |      elem.className   = "code-highlighted";
130 |    }
131 |  }
132 |  function CodeHighlightOff(elem, id)
133 |  {
134 |    var target = document.getElementById(id);
135 |    if(elem.cacheClassElem)
136 |      elem.className = elem.cacheClassElem;
137 |    if(elem.cacheClassTarget)
138 |      target.className = elem.cacheClassTarget;
139 |  }
140 | /*]]>*///-->
141 | </script>
142 | </head>
143 | <body>
144 | <div id="content">
145 | <div id="table-of-contents">
146 | <h2>Table of Contents</h2>
147 | <div id="text-table-of-contents">
148 | <ul>
149 | <li><a href="#orgheadline1">1. Introduction</a></li>
150 | <li><a href="#orgheadline6">2. Installation and use</a>
151 | <ul>
152 | <li><a href="#orgheadline2">2.1. Simplest installation possible - somewhere on the cluster</a></li>
153 | <li><a href="#orgheadline3">2.2. Simplest installation possible - on your own computer</a></li>
154 | <li><a href="#orgheadline4">2.3. More general installation (requires HTTP server)</a></li>
155 | <li><a href="#orgheadline5">2.4. Using ElasticSearch and Grafana - future direction</a></li>
156 | </ul>
157 | </li>
158 | </ul>
159 | </div>
160 | </div>
161 | 
162 | <div id="outline-container-orgheadline1" class="outline-2">
163 | <h2 id="orgheadline1"><span class="section-number-2">1</span> Introduction</h2>
164 | <div class="outline-text-2" id="text-1">
165 | <p>
166 | The extract-glvolprof.py program is meant to assist with visualizing the performance of
167 | a gluster volume, using the <code>gluster volume profile</code> command.
168 | </p>
169 | 
170 | <p>
171 | It consists of three pieces:
172 | </p>
173 | 
174 | <ul class="org-ul">
175 | <li>gvp.sh: a bash script which runs the above command periodically for a number
176 | of samples, storing the results in a file.</li>
177 | 
178 | <li>extract_glvolprof.py: a python script that takes that output file
179 | and massages it into a form that can be used for visualization. One
180 | component of this is an HTMl file that can be viewed in a
181 | browser. The other is a bunch of CSV files containing the
182 | data. These files can also be used with a spreadsheet application if
183 | desired, to produce graphs that way.</li>
184 | 
185 | <li>gvp-graph-javascript.tgz: a tarball containing some javascript
186 | libraries that are used by the HTML file above and provide common
187 | code to read CSV files and produce graphs using the nvd3 library.</li>
188 | </ul>
189 | </div>
190 | </div>
191 | 
192 | 
193 | <div id="outline-container-orgheadline6" class="outline-2">
194 | <h2 id="orgheadline6"><span class="section-number-2">2</span> Installation and use</h2>
195 | <div class="outline-text-2" id="text-2">
196 | </div><div id="outline-container-orgheadline2" class="outline-3">
197 | <h3 id="orgheadline2"><span class="section-number-3">2.1</span> Simplest installation possible - somewhere on the cluster</h3>
198 | <div class="outline-text-3" id="text-2-1">
199 | <p>
200 | Copy the scripts to some directory in the cluster, unpack the tarball
201 | in the same directory.  Run the gvp.sh script. As an illustration,
202 | let's say we want to run it every 10 seconds and 12 iterations
203 | (roughly two minutes of operation) - in practice, you might want to
204 | do that periodically, perhaps in a cron job, in order to see the behavior
205 | of the cluster over time:
206 | </p>
207 | <div class="org-src-container">
208 | 
209 | <pre class="src src-shell">./gvp.sh 10 12
210 | </pre>
211 | </div>
212 | <p>
213 | By default, the output file is called <code>gvp.log</code>. Then run the extract script
214 | on that output file:
215 | </p>
216 | <div class="org-src-container">
217 | 
218 | <pre class="src src-shell">python extract_glvolprof.py gvp.log
219 | </pre>
220 | </div>
221 | <p>
222 | The output (a bunch of CSV files and an HTML summary page) is placed in
223 | a subdirectory called <code>gvp.log_csvdir</code>.
224 | </p>
225 | <p>
226 | since we use pbench javascript graphing, then
227 | column 1 in the .csv is always the timestamp in milliseconds when
228 | that sample took place.  This can be disabled with the environment variable
229 | SKIP_PBENCH_GRAPHING.
230 | 
231 | the stat types are:
232 | </p>
233 | <li> pct-lat - percentage latency consumed by this FOP (file operation) </li>
234 | <li> avg-lat - average latency (usec) </li>
235 | <li> min-lat - minimum latency (usec) </li>
236 | <li> max-lat - maximum latency (usec) </li>
237 | <li>call-rate - how many FOP requests have been processed per second</li>
238 | <p>
239 | for each category, there are several kinds of .csv files produced.
240 | For each FOP + stat type, we show per-brick results and results across all bricks.
241 | For each stat type, across all bricks, we show FOP stats together in vol*allfop.csv files.
242 | We also show read/write traffic by brick in MBps*bricks.csv files.
243 | Finally we show read/write traffic for the entire volume in
244 | MBps*volume.csv files.
245 | </p>
246 | <p>
247 | To see the graphs, fire up a browser and point it to the URL that the extract
248 | script printed:
249 | </p>
250 | <div class="org-src-container">
251 | 
252 | <pre class="src src-shell">firefox gvp.log_csvdir/gvp-graphs.html
253 | </pre>
254 | </div>
255 | <p>
256 | BTW, not everything works: e.g. the "Save as Image" button does not. Note
257 | also that the layout is crucial: the CSV subdirectory contains a
258 | symlink <code>static</code>, which points to the <code>static</code> subdirectory in the
259 | main directory (which is where the javascript tarball was
260 | unpacked). If you change that structure, then the javascript files may
261 | not be found: no graphs!
262 | </p>
263 | </div>
264 | </div>
265 | 
266 | <div id="outline-container-orgheadline3" class="outline-3">
267 | <h3 id="orgheadline3"><span class="section-number-3">2.2</span> Simplest installation possible - on your own computer</h3>
268 | <div class="outline-text-3" id="text-2-2">
269 | <p>
270 | Alternatively, install everything as above somewhere on your own computer.
271 | Copy only the <code>gvp.sh</code> script to the cluster and run it there, then copy
272 | the <code>gvp.log</code> file back to your computer and process it locally as above.
273 | </p>
274 | </div>
275 | </div>
276 | 
277 | <div id="outline-container-orgheadline4" class="outline-3">
278 | <h3 id="orgheadline4"><span class="section-number-3">2.3</span> More general installation (requires HTTP server)</h3>
279 | <div class="outline-text-3" id="text-2-3">
280 | <p>
281 | For more general access, you might want to set up an HTTP server, either
282 | in the cluster itself or on some other accessible machine.
283 | </p>
284 | 
285 | <p>
286 | The idea is to run the <code>gvp.sh</code> script in the cluster and copy the
287 | <code>gvp.log</code> file (probably renaming it using a timestamp to keep the
288 | name unique) to the HTTP server, under the document root directory.
289 | The extraction script can then be run on it to produce the CSV
290 | subdirectory under the server's document root:
291 | </p>
292 | 
293 | <div class="org-src-container">
294 | 
295 | <pre class="src src-shell">python extract_glvolprof.py /var/www/html/gvp/gvp.log.TIMESTAMP
296 | </pre>
297 | </div>
298 | 
299 | <p>
300 | You can then view the results from a browser running elsewhere using
301 | a URL like
302 | </p>
303 | <pre class="example">
304 | http://SERVER.DOMAIN/gvp/gvp.log.TIMESTAMP_csvdir/gvp-graphs.html
305 | </pre>
306 | </div>
307 | </div>
308 | 
309 | <div id="outline-container-orgheadline5" class="outline-3">
310 | <h3 id="orgheadline5"><span class="section-number-3">2.4</span> Using ElasticSearch and Grafana - future direction</h3>
311 | <div class="outline-text-3" id="text-2-4">
312 | <p>
313 | It's also possible to index the CSV files into ElasticSearch and use
314 | Grafana for the visualization. That's probably the most flexible solution,
315 | but it would need some additional work, not included here.
316 | </p>
317 | </div>
318 | </div>
319 | </div>
320 | </div>
321 | <div id="postamble" class="status">
322 | <p class="date">Created: 2015-10-26 Mon 19:38</p>
323 | <p class="validation"><a href="http://validator.w3.org/check?uri=referer">Validate</a></p>
324 | </div>
325 | </body>
326 | </html>
327 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 
341 | 


--------------------------------------------------------------------------------
/extract-gl-client-prof.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | #
  5 | # extract-gl-client-prof.py
  6 | # written by Ben England 2015
  7 | # copyright is GNU GPL V3, for details read:
  8 | #   https://tldrlegal.com/license/gnu-general-public-license-v3-%28gpl-3%29#fulltext
  9 | #
 10 | # Note: this tool uses a snapshot of javascript code from this project:
 11 | #   https://github.com/distributed-system-analysis/pbench
 12 | # but we do not support any use of this software outside of the graphing 
 13 | # of the data generated below.
 14 | #
 15 | # script to read gluster client-side output retrieved every N seconds
 16 | # and generate operation rate graph from it
 17 | #
 18 | # NOTE: the tool creates a subdirectory just for each run of this analysis tool.
 19 | # the directory name is just the name of the log file
 20 | # with the suffix '_csvdir'
 21 | #
 22 | # to install:
 23 | #   - extract javascript code from this tarball
 24 | #      https://s3.amazonaws.com/ben.england/gvp-graph-javascript.tgz
 25 | #   - if the directory containing your gluster volume output log is different, create a
 26 | #   'static' symlink pointing to the static/ subdirectory you just extracted
 27 | #     in the subdirectories where .csv and .html files live, you will see a 
 28 | #     'static' softlink pointing to this symlink.
 29 | #
 30 | # input:
 31 | #  this script expects input data to look like what this script produces:
 32 | #
 33 | #  https://raw.githubusercontent.com/bengland2/parallel-libgfapi/master/gvp-client.sh
 34 | #
 35 | #  record 1 contains the user-specified sample count and interval
 36 | #  used by gvp-client.sh.
 37 | #  record 2 is a timestamp generated by gluster in format like:
 38 | #    Wed Oct 21 22:50:28 UTC 2015
 39 | #  subsequent "gluster volume profile your-volume info" outputs are 
 40 | #  concatenated to the profile log.  
 41 | #  Each profile sample is assumed to happen approximately N seconds after
 42 | #  the preceding sample, where N is the gvp.sh sampling interval.
 43 | #  seconds.  The first sample happens N seconds after the timestamp.
 44 | #
 45 | # output:
 46 | #
 47 | #  when we're all done reading in data,
 48 | #  we then print it out in a format suitable for spreadsheet-based graphing
 49 | #
 50 | #  since we use pbench javascript graphing, then
 51 | #  column 1 in the .csv is always the timestamp in milliseconds when
 52 | #  that sample took place.  This can be disabled with the environment variable
 53 | #  SKIP_PBENCH_GRAPHING.
 54 | #
 55 | #  the stat types are:
 56 | #  - pct-lat - percentage latency consumed by this FOP (file operation)
 57 | #  - avg-lat - average latency (usec)
 58 | #  - min-lat - minimum latency (usec)
 59 | #  - max-lat - maximum latency (usec)
 60 | #  - call-rate - how many FOP requests have been processed per second
 61 | #  for each category:
 62 | #  - for each stat type, show stat by FOP
 63 | #
 64 | # internals:
 65 | #
 66 | # the "intervals" array, indexed by interval number, stores results over time
 67 | # within each array element, we have IntervalProfile objects containing
 68 | # bytes read/written and a dictionary indexed by FOP name
 69 | # containing FopProfile instances to represent the per-FOP records
 70 | # in "gluster volume profile" output.
 71 | # the per-FOP dictionary is indexed by FOP name
 72 | #
 73 | 
 74 | import sys
 75 | import os
 76 | from os.path import join
 77 | import re
 78 | import time
 79 | import shutil
 80 | import collections
 81 | 
 82 | # fields in gluster volume profile output
 83 | 
 84 | stat_names = ['pct-lat', 'avg-lat', 'min-lat', 'max-lat', 'call-rate']
 85 | directions = ['MBps-read', 'MBps-written']
 86 | min_lat_infinity = 1.0e24
 87 | 
 88 | # this environment variable lets you graph .csv files using pbench
 89 | 
 90 | pbench_graphs = True
 91 | if os.getenv('SKIP_PBENCH_GRAPHING'): pbench_graphs = False
 92 | 
 93 | # this is the list of graphs that will be produced
 94 | 
 95 | graph_csvs = [
 96 |     ('MBps-written', 'MB/sec written to Gluster volume'), 
 97 |     ('MBps-read', 'MB/sec read from Gluster volume'),
 98 |     ('call-rate', 'FOP call rates'),
 99 |     ('pct-lat', 'percentage latency by FOP')
100 | ]
101 | 
102 | # all gvp.sh-generated profiles are expected to have these parameters
103 | # we define them here to have global scope, and they are only changed
104 | # by the input parser
105 | 
106 | start_time = None
107 | expected_duration = None
108 | expected_sample_count = None
109 | sorted_fop_names = None
110 | intervals = None
111 | 
112 | # this class stores per-fop statistics from gluster client profile output
113 | # to compute stats for %latency and average latency across a set of bricks,
114 | # we have to compute averages weighted by FOP calls
115 | # We do this in two steps:
116 | # - loop over set of instances and compute weighted sum (not average)
117 | # - after loop, normalize using total calls
118 | 
119 | 
120 | class FopProfile:
121 | 
122 |     def __init__(self, avg_lat, min_lat, max_lat, calls):
123 |         self.avg_lat = avg_lat
124 |         self.min_lat = min_lat
125 |         self.max_lat = max_lat
126 |         self.calls = calls
127 |         self.pct_lat = 0.0  # will compute later
128 | 
129 |     def __str__(self):
130 |         return '%6.2f, %8.0f, %8.0f, %8.0f, %d' % (
131 |             self.pct_lat, self.avg_lat, self.min_lat, self.max_lat, self.calls)
132 | 
133 |     # append a single field to .csv record based on statistic type
134 |     # use "-6.2f" instead of "%6.2f" so there are no leading spaces in record,
135 |     # otherwise spreadsheet inserts colums at col. B
136 | 
137 |     def field2str(self, stat, duration):
138 |         if stat == stat_names[0]:
139 |             return '%-6.2f' % self.pct_lat
140 |         elif stat == stat_names[1]:
141 |             return '%8.0f' % self.avg_lat
142 |         elif stat == stat_names[2]:
143 |             if self.min_lat == min_lat_infinity:
144 |                 return ''  # don't confuse spreadsheet/user
145 |             else:
146 |                 return '%8.0f' % self.min_lat
147 |         elif stat == stat_names[3]:
148 |             if self.max_lat == 0:
149 |                 return ''
150 |             else:
151 |                 return '%8.0f' % self.max_lat
152 |         elif stat == stat_names[4]:
153 |             call_rate = self.calls / float(duration)
154 |             return '%10.3f' % call_rate
155 | 
156 |     # accumulate weighted sum of component profiles, will normalize them later
157 | 
158 |     def accumulate(self, addend):
159 |         self.avg_lat += (addend.avg_lat * addend.calls)
160 |         self.calls += addend.calls
161 |         if addend.calls > 0:
162 |             self.max_lat = max(self.max_lat, addend.max_lat)
163 |             self.min_lat = min(self.min_lat, addend.min_lat)
164 | 
165 |     # normalize weighted sum to get averages
166 | 
167 |     def normalize_sum(self):
168 |         try:
169 |             # totals will become averages
170 |             self.avg_lat /= self.calls
171 |         except ZeroDivisionError:  # if no samples, set these stats to zero
172 |             self.pct_lat = 0.0
173 |             self.avg_lat = 0.0
174 | 
175 |     # compute % latency for this FOP given total latency of all FOPs
176 | 
177 |     def get_pct_lat(self, total_lat):
178 |         try:
179 |             self.pct_lat = 100.0 * (self.avg_lat * self.calls) / total_lat
180 |         except ZeroDivisionError:  # if no samples, set these stats to zero
181 |             self.pct_lat = 0.0
182 | 
183 | 
184 | class ProfileInterval:
185 | 
186 |     def __init__(self):
187 |         self.bytes_read = None
188 |         self.bytes_written = None
189 |         self.duration = None
190 |         self.fop_profiles = {}
191 | 
192 |     def __str__(self):
193 |         return '%d, %d, %s, %s'%(
194 |             self.bytes_read, self.bytes_written, 
195 |             str(self.duration), [ str(f) + ' : ' + str(self.fop_profiles[f]) for f in self.fop_profiles ])
196 | 
197 | 
198 | # if there is an error parsing the input...
199 | 
200 | def usage(msg):
201 |     print('ERROR: %s' % msg)
202 |     print('usage: extract-gl-client-prof.py your-gluster-client-profile.log')
203 |     sys.exit(1)
204 | 
205 | 
206 | # segregate .csv files into a separate output directory
207 | # with pathname derived from the input log file with _csvdir suffix
208 | 
209 | def make_out_dir(path):
210 |     dir_path = path + '_csvdir'
211 |     try:
212 |         if os.path.exists(dir_path):
213 |             shutil.rmtree(dir_path)
214 |         os.mkdir(dir_path)
215 |     except IOError:
216 |         usage('could not (re-)create directory ' + dir_path)
217 |     return dir_path
218 | 
219 | 
220 | # convert gvp-client.sh client profile output
221 | # into a time series of per-fop results.
222 | 
223 | def parse_input(input_pathname):
224 |     global start_time
225 |     global expected_sample_interval
226 |     global expected_sample_count
227 |     global sorted_fop_names
228 |     global intervals
229 | 
230 |     try:
231 |         with open(input_pathname, 'r') as file_handle:
232 |             lines = [ l.strip() for l in file_handle.readlines() ]
233 |     except IOError:
234 |         usage('could not read ' + input_pathname)
235 |     tokens = lines[0].split()
236 |     expected_sample_count = int(tokens[0])
237 |     expected_sample_interval = int(tokens[1])
238 |     start_time = time.mktime(
239 |             time.strptime(
240 |                     lines[1], '%a %b %d %H:%M:%S %Z %Y')) * 1000
241 |     print('collection started at %s' % lines[1])
242 |     print('sampling interval is %d seconds' % expected_sample_interval)
243 |     print('expected sample count is %d samples' % expected_sample_count)
244 | 
245 |     # parse the file and record each cell of output in a way that lets you
246 |     # aggregate across bricks later
247 | 
248 |     found_cumulative_output = False
249 |     found_interval_output = False
250 |     all_caps_name = re.compile('^[A-Z]{3,15}')
251 |     fop_names = set()
252 |     last_intvl = -2
253 |     intvl = -1
254 |     per_op_table = {}
255 |     sample = -1
256 |     intervals = []
257 |     for ln in lines[2:]:
258 |         tokens = ln.split()
259 | 
260 |         if ln.__contains__('Interval') and ln.__contains__('stats'):
261 | 
262 |             interval_number = int(tokens[2])
263 |             assert intvl == last_intvl + 1
264 |             last_intvl = intvl
265 |             intvl += 1
266 |             intvl_profile = ProfileInterval()
267 |             intervals.append(intvl_profile)
268 |             found_interval_output = True
269 | 
270 |         elif ln.__contains__('Cumulative Stats'):
271 | 
272 |             found_cumulative_output = True
273 | 
274 |         elif ln.__contains__('Duration :'):
275 | 
276 |             # we are at end of output for this brick and interval
277 | 
278 |             assert found_cumulative_output ^ found_interval_output
279 |             duration = int(tokens[2])
280 |             diff_from_expected = abs(duration - expected_sample_interval)
281 |             if found_interval_output:
282 |                 if diff_from_expected > 1:
283 |                     print(('WARNING: in sample %d the sample ' +
284 |                            'interval %d deviates from expected value %d') %
285 |                             (sample, duration, expected_sample_interval))
286 |                 fops_in_interval = intervals[intvl]
287 |                 fops_in_interval.duration = duration
288 | 
289 |         elif ln.__contains__('BytesRead'):
290 | 
291 |             if found_interval_output:
292 |                 intvl_profile = intervals[intvl]
293 |                 intvl_profile.bytes_read = int(tokens[2])
294 | 
295 |         elif ln.__contains__('BytesWritten'):
296 | 
297 |             if found_interval_output:
298 |                 intvl_profile = intervals[intvl]
299 |                 intvl_profile.bytes_written = int(tokens[2])
300 | 
301 |         elif ln.__contains__('Cumulative stats'):
302 | 
303 |                 # this is the end of this sample
304 | 
305 |                 found_interval_output = False
306 |                 found_cumulative_output = True
307 | 
308 |         elif ln.__contains__('Current open fd'):
309 | 
310 |                 found_cumulative_output = False
311 | 
312 |         elif found_interval_output and all_caps_name.match(ln):
313 | 
314 |             # we found a record we're interested in,
315 |             # accumulate table of data for each gluster function
316 | 
317 |             sample += 1
318 |             intvl_profile = intervals[intvl]
319 |             fop_name = tokens[0]
320 |             fop_names.add(fop_name)
321 |             new_fop_profile = FopProfile(
322 |                     float(tokens[2]), float(tokens[4]), float(tokens[6]),
323 |                     float(tokens[1]))
324 |             try:
325 |                 fop_stats = intvl_profile.fop_profiles[fop_name]
326 |                 raise Exception('did not expect fop already defined: %s' %
327 |                         str(intvl_profile))
328 |             except KeyError:
329 |                 intvl_profile.fop_profiles[fop_name] = new_fop_profile
330 |     sorted_fop_names = sorted(fop_names)
331 | 
332 | 
333 | # generate timestamp_ms column for pbench 
334 | # given starting time of collection, sampling interval and sample number
335 | 
336 | def gen_timestamp_ms(sample_index):
337 |     return start_time + ((expected_sample_interval * sample_index) * 1000)
338 | 
339 | 
340 | # generate denominator for call rate computation based on duration type
341 | 
342 | def get_interval(interval_index, duration_type = 'interval'):
343 |     if duration_type == 'cumulative':
344 |         return interval_index * float(expected_sample_interval)
345 |     else:
346 |         return float(expected_sample_interval)
347 | 
348 | # display bytes read and bytes written
349 | # normalize to MB/s with 3 decimal places so 1 KB/s/brick will show
350 | 
351 | def gen_output_bytes(out_dir_path):
352 |     bytes_per_MB = 1000000.0
353 |     for direction in directions:
354 |         # when we support cumulative data, then we can name files this way
355 |         #direction_filename = duration_type + '_' + direction + '.csv'
356 |         direction_filename = direction + '.csv'
357 |         direction_pathname = join(out_dir_path, direction_filename)
358 |         with open(direction_pathname, 'w') as transfer_fh:
359 |             if pbench_graphs: 
360 |                 transfer_fh.write('timestamp_ms, ')
361 |             transfer_fh.write('MB/s\n')
362 |             for j in range(0, len(intervals)):
363 |                 if pbench_graphs:
364 |                     transfer_fh.write('%d, ' % gen_timestamp_ms(j))
365 |                 rate_interval = get_interval(j) 
366 |                 interval_profile = intervals[j]
367 |                 if direction.__contains__('read'):
368 |                     transfer = interval_profile.bytes_read
369 |                 else:
370 |                     transfer = interval_profile.bytes_written
371 |                 transfer_fh.write('%-8.3f\n' % 
372 |                     ((transfer/rate_interval)/bytes_per_MB))
373 | 
374 | # display per-FOP (file operation) stats,
375 | 
376 | def gen_per_fop_stats(out_dir_path, stat, duration_type='interval'):
377 |     per_fop_filename = stat + '.csv'
378 |     per_fop_path = join(out_dir_path, per_fop_filename)
379 |     with open(per_fop_path, 'a') as fop_fh:
380 |         hdr = ''
381 |         if pbench_graphs:
382 |             hdr += 'timestamp_ms, '
383 |         hdr += ','.join(sorted_fop_names)
384 |         hdr += '\n'
385 |         fop_fh.write(hdr)
386 |         for i in range(0, len(intervals)):
387 |             interval_profile = intervals[i]
388 |             fops_in_interval = interval_profile.fop_profiles
389 |             all_fop_profile = FopProfile(0, 0, 0, 0)
390 |             for fop in sorted_fop_names:
391 |                 fop_stats = fops_in_interval[fop]
392 |                 all_fop_profile.accumulate(fop_stats)
393 |             all_fop_profile.normalize_sum()
394 |             #print('intvl: %d' % i)
395 |             #print('ALL FOPs: %s' % all_fop_profile)
396 |             if pbench_graphs:
397 |                 fop_fh.write('%d, ' % gen_timestamp_ms(i))
398 |             columns = []
399 |             for fop in sorted_fop_names:
400 |                 fop_stats = fops_in_interval[fop]
401 |                 fop_stats.get_pct_lat(
402 |                     all_fop_profile.avg_lat * all_fop_profile.calls)
403 |                 try:
404 |                     fop_stats = fops_in_interval[fop]
405 |                 except KeyError:
406 |                     fops_in_interval[fop] = fop_stats
407 |                 columns.append(
408 |                     fop_stats.field2str(
409 |                         stat, interval_profile.duration))
410 |             fop_fh.write(','.join(columns) + '\n')
411 | 
412 | # generate graphs in 
413 | # generate output files in separate directory from
414 | # data structure returned by parse_input
415 | 
416 | next_graph_template='''
417 |     <div class="chart">
418 |       <h3 class="chart-header">%s
419 |         <button id="save1">Save as Image</button>
420 |         <div id="svgdataurl1"></div>
421 |       </h3>
422 |       <svg id="chart%d"></svg>
423 |       <canvas id="canvas1" style="display:none"></canvas>
424 |       <script>
425 |         constructChart("lineChart", %d, "%s", 0.00);
426 |       </script>
427 |     </div>
428 | '''
429 | 
430 | def output_next_graph(graph_fh, gr_index):
431 |     (csv_filename, graph_description) = graph_csvs[gr_index]
432 |     gr_index += 1  # graph numbers start at 1
433 |     graph_fh.write( next_graph_template % (
434 |                     graph_description, gr_index, gr_index, csv_filename))
435 | 
436 | # static content of HTML file
437 | 
438 | header='''
439 | <!DOCTYPE HTML>
440 | <html>
441 |   <head>
442 |     <meta charset="utf-8">
443 |     <link href="static/css/v0.2/nv.d3.css" rel="stylesheet" type="text/css" media="all">
444 |     <link href="static/css/v0.2/pbench_utils.css" rel="stylesheet" type="text/css" media="all">
445 |     <script src="static/js/v0.2/function-bind.js"></script>
446 |     <script src="static/js/v0.2/fastdom.js"></script>
447 |     <script src="static/js/v0.2/d3.js"></script>
448 |     <script src="static/js/v0.2/nv.d3.js"></script>
449 |     <script src="static/js/v0.2/saveSvgAsPng.js"></script>
450 |     <script src="static/js/v0.2/pbench_utils.js"></script>
451 |   </head>
452 |   <body class="with-3d-shadow with-transitions">
453 |     <h2 class="page-header">summary profile of application activity on one client</h2>
454 | '''
455 | 
456 | trailer='''
457 |   </body>
458 | </html>
459 | '''
460 | 
461 | 
462 | # generate graphs using header, trailer and graph template
463 | 
464 | def gen_graphs(out_dir_path):
465 |     graph_path = join(out_dir_path, 'gvp-client-graphs.html')
466 |     with open(graph_path, 'w') as graph_fh:
467 |         graph_fh.write(header)
468 |         for j in range(0, len(graph_csvs)):
469 |             output_next_graph(graph_fh, j)
470 |         graph_fh.write(trailer)
471 |     return graph_path
472 | 
473 | 
474 | # make link to where javascript etc lives in unpacked tarball
475 | # ASSUMPTION is that output directory is a subdirectory of where this script
476 | # lives (not a sub-subdirectory).  Sorry but that's the only way to generate a
477 | # softlink that works when we copy the csvdir to a different location.
478 | 
479 | def gen_static_softlink(out_dir_path):
480 |     saved_cwd = os.getcwd()
481 |     static_dir = join(saved_cwd, 'static')
482 |     if not os.path.exists(static_dir):
483 |         print('ERROR: sorry, the javascript directory "static" ' + 
484 |               'needs to be in same directory as this script, trying anyway...')
485 |     os.chdir(out_dir_path)
486 |     os.symlink(join('..', 'static'), 'static')
487 |     os.chdir(saved_cwd)
488 | 
489 | # generate everything needed to view the graphs
490 | 
491 | def generate_output(out_dir_path):
492 | 
493 |     gen_output_bytes(out_dir_path)
494 |     for s in stat_names:
495 |         gen_per_fop_stats(out_dir_path, s)
496 |     graph_path = gen_graphs(out_dir_path)
497 |     gen_static_softlink(out_dir_path)
498 | 
499 |     sys.stdout.write('Gluster FOP types seen: ')
500 |     for fop_name in sorted_fop_names:
501 |         sys.stdout.write(' ' + fop_name)
502 |     sys.stdout.write('\n')
503 |     print('created Gluster statistics files in directory %s' % out_dir_path)
504 |     if not os.path.isabs(graph_path):
505 |         graph_path = join(os.getcwd(), graph_path)
506 |     print('graphs now available at browser URL file://%s' % graph_path)
507 | 
508 | 
509 | # the main program is kept in a subroutine so that it can run on Windows.
510 | 
511 | def main():
512 |     if len(sys.argv) < 2:
513 |         usage('missing gluster volume profile output log filename parameter'
514 |               )
515 |     fn = sys.argv[1]
516 |     parse_input(fn)
517 |     outdir = make_out_dir(fn)
518 |     generate_output(outdir)
519 | 
520 | main()
521 | 


--------------------------------------------------------------------------------
/extract-glvolprof.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | #
  5 | # extract-glvolprof.py
  6 | # written by Ben England 2015
  7 | # copyright is GNU GPL V3, for details read:
  8 | #   https://tldrlegal.com/license/gnu-general-public-license-v3-%28gpl-3%29#fulltext
  9 | #
 10 | # script to read gluster volume profile output retrieved every N seconds
 11 | # and generate operation rate graph from it
 12 | #
 13 | # see gvp-README.html in the same directory for directions on use.
 14 | #
 15 | # Note: this tool uses a snapshot of javascript code from this project:
 16 | #   https://github.com/distributed-system-analysis/pbench
 17 | # but we do not support any use of this software outside of the graphing 
 18 | # of the data generated below.  The Save Image button does not work yet
 19 | #
 20 | # input:
 21 | #  this script expects input data to look like what the gvp.sh script (in same
 22 | #  directory) produces:
 23 | #
 24 | #  record 1 is a timestamp in format YYYY-MM-DD-HH-MM
 25 | #  record 2 contains the user-specified sample interval and count
 26 | #  used by gvp.sh.
 27 | #  subsequent "gluster volume profile your-volume info" outputs are 
 28 | #  concatenated to the profile log.  
 29 | #  Each profile sample happens exactly N seconds after
 30 | #  the preceding sample, where N is the gvp.sh sampling interval.
 31 | #  seconds.  The first sample happens N seconds after the timestamp.
 32 | #
 33 | # output:
 34 | #
 35 | #  when we're all done reading in data,
 36 | #  we then print it out in a format suitable for spreadsheet-based graphing
 37 | #
 38 | # internals:
 39 | #
 40 | # the "intervals" array, indexed by interval number, stores results over time
 41 | # within each array element,
 42 | # we have a dictionary indexed by brick name containing BrickProfile instances
 43 | # these in turn contain a dictionary of BrickFopProfile instances
 44 | # to represent the per-FOP records in "gluster volume profile" output
 45 | #
 46 | # the per-brick dictionary is indexed by a string
 47 | # starting with 'cumul' or 'intvl' and ending with the FOP name
 48 | # this isn't strictly necessary but provides latent support
 49 | # for someday including cumulative stats as well as per-interval stats
 50 | #
 51 | # stats for the entire volume are rolled up using call rates for weighted averaging
 52 | #
 53 | 
 54 | import sys
 55 | import os
 56 | from os.path import join
 57 | import re
 58 | import time
 59 | import shutil
 60 | import collections
 61 | 
 62 | # fields in gluster volume profile output
 63 | 
 64 | time_duration_types = ['cumulative', 'interval']
 65 | stat_names = ['pct-lat', 'avg-lat', 'min-lat', 'max-lat', 'call-rate']
 66 | directions = ['MBps-read', 'MBps-written']
 67 | min_lat_infinity = 1.0e24
 68 | 
 69 | # this environment variable lets you graph .csv files using pbench
 70 | 
 71 | pbench_graphs = True
 72 | if os.getenv('SKIP_PBENCH_GRAPHING'): pbench_graphs = False
 73 | 
 74 | # this is the list of graphs that will be produced
 75 | 
 76 | graph_csvs = [
 77 |     ('vol_call-rate_allfop', 'volume-level FOP call rates'),
 78 |     ('vol_pct-lat_allfop', 'percentage server-side latency by FOP'),
 79 |     ('MBps-written-volume', 'MB/sec written to Gluster volume'), 
 80 |     ('MBps-read-volume', 'MB/sec read from Gluster volume'),
 81 |     ('MBps-written-bricks', 'MB/sec written to Gluster bricks'), 
 82 |     ('MBps-read-bricks', 'MB/sec read from Gluster bricks')
 83 | ]
 84 | 
 85 | # all gvp.sh-generated profiles are expected to have these parameters
 86 | # we define them here to have global scope, and they are only changed
 87 | # by the input parser
 88 | 
 89 | start_time = None
 90 | expected_duration = None
 91 | expected_sample_count = None
 92 | sorted_fop_names = None
 93 | sorted_brick_names = None
 94 | intervals = None
 95 | 
 96 | # this class stores per-fop statistics from gluster volume profile output
 97 | # to compute stats for %latency and average latency across a set of bricks,
 98 | # we have to compute averages weighted by brick usage.
 99 | # We do this in two steps:
100 | # - loop over set of instances and compute weighted sum (not average)
101 | # - after loop, normalize using total calls
102 | 
103 | 
104 | class BrickFopProfile:
105 | 
106 |     def __init__(self, pct_lat, avg_lat, min_lat, max_lat, calls):
107 |         self.pct_lat = pct_lat
108 |         self.avg_lat = avg_lat
109 |         self.min_lat = min_lat
110 |         self.max_lat = max_lat
111 |         self.calls = calls
112 | 
113 |     def __str__(self):
114 |         return '%6.2f, %8.0f, %8.0f, %8.0f, %d' % (
115 |             self.pct_lat, self.avg_lat, self.min_lat, self.max_lat, self.calls)
116 | 
117 |     # append a single field to .csv record based on statistic type
118 |     # use "-6.2f" instead of "%6.2f" so there are no leading spaces in record,
119 |     # otherwise spreadsheet inserts colums at col. B
120 | 
121 |     def field2str(self, stat, duration):
122 |         if stat == stat_names[0]:
123 |             return '%-6.2f' % self.pct_lat
124 |         elif stat == stat_names[1]:
125 |             return '%8.0f' % self.avg_lat
126 |         elif stat == stat_names[2]:
127 |             if self.min_lat == min_lat_infinity:
128 |                 return ''  # don't confuse spreadsheet/user
129 |             else:
130 |                 return '%8.0f' % self.min_lat
131 |         elif stat == stat_names[3]:
132 |             if self.max_lat == 0:
133 |                 return ''
134 |             else:
135 |                 return '%8.0f' % self.max_lat
136 |         elif stat == stat_names[4]:
137 |             call_rate = self.calls / float(duration)
138 |             return '%9.2d' % call_rate
139 | 
140 |     # accumulate weighted sum of component profiles, will normalize them later
141 | 
142 |     def accumulate(self, addend):
143 |         self.pct_lat += (addend.pct_lat * addend.calls)
144 |         self.avg_lat += (addend.avg_lat * addend.calls)
145 |         if addend.calls > 0:
146 |             self.max_lat = max(self.max_lat, addend.max_lat)
147 |             self.min_lat = min(self.min_lat, addend.min_lat)
148 |         self.calls += addend.calls
149 | 
150 |     # normalize weighted sum to get averages
151 | 
152 |     def normalize_sum(self):
153 |         try:
154 |             # totals will become averages
155 |             self.pct_lat /= self.calls
156 |             self.avg_lat /= self.calls
157 |         except ZeroDivisionError:  # if no samples, set these stats to zero
158 |             self.pct_lat = 0.0
159 |             self.avg_lat = 0.0
160 | 
161 | 
162 | def zero_bfprofile():
163 |     # variable to accumulate stats across all bricks
164 |     # for min, use some very large number
165 |     # that latency will never exceed so that
166 |     # min(lat, all_min_lat) == lat
167 |     # same for max, use a lower bound for latency (0)
168 |     # so max(lat, all_max_lat) = lat
169 |     return BrickFopProfile(0.0, 0.0, min_lat_infinity, 0.0, 0)
170 | 
171 | 
172 | # this class stores per-brick results
173 | 
174 | class BrickProfile:
175 | 
176 |     def __init__(self):
177 |         self.bytes_read = 0
178 |         self.bytes_written = 0
179 |         self.interval = 0  # seconds, so DivisionByZero exception if not set
180 |         # BrickFopProfile results stored in dictionary indexed by FOP name
181 |         self.per_fop = {}
182 | 
183 |     def __str__(self):
184 |         return '%d, %d, %s' % (
185 |             self.bytes_read, self.bytes_written, str(self.per_fop))
186 | 
187 | 
188 | # if there is an error parsing the input...
189 | 
190 | def usage(msg):
191 |     print('ERROR: %s' % msg)
192 |     print('usage: extract-glvolprof.py your-gluster-volume-profile.log')
193 |     sys.exit(1)
194 | 
195 | 
196 | # because we produce so many .csv files, segregate them into a separate output
197 | # directory with pathname derived from the input log file with _csvdir suffix
198 | 
199 | def make_out_dir(path):
200 |     dir_path = path + '_csvdir'
201 |     try:
202 |         if os.path.exists(dir_path):
203 |             shutil.rmtree(dir_path)
204 |         os.mkdir(dir_path)
205 |     except IOError:
206 |         usage('could not (re-)create directory ' + dir_path)
207 |     return dir_path
208 | 
209 | 
210 | # convert gluster volume profile output
211 | # into a time series of per-brick per-fop results.
212 | 
213 | def parse_input(input_pathname):
214 |     global start_time
215 |     global expected_sample_interval
216 |     global expected_sample_count
217 |     global sorted_fop_names
218 |     global sorted_brick_names
219 |     global intervals
220 | 
221 |     try:
222 |         with open(input_pathname, 'r') as file_handle:
223 |             lines = [ l.strip() for l in file_handle.readlines() ]
224 |     except IOError:
225 |         usage('could not read ' + input_pathname)
226 |     start_time = time.mktime(time.strptime(lines[0], '%Y-%m-%d-%H-%M')) * 1000
227 |     tokens = lines[1].split()
228 |     expected_sample_interval = int(tokens[0])
229 |     expected_sample_count = int(tokens[1])
230 |     print('collection started at %s' % lines[0])
231 |     print('sampling interval is %d seconds' % expected_sample_interval)
232 |     print('expected sample count is %d samples' % expected_sample_count)
233 | 
234 |     # parse the file and record each cell of output in a way that lets you
235 |     # aggregate across bricks later
236 | 
237 |     found_cumulative_output = False
238 |     found_interval_output = False
239 |     all_caps_name = re.compile('.*[A-Z]+$')
240 |     fop_names = set()
241 |     last_intvl = -1
242 |     intvl = -1
243 |     per_op_table = {}
244 |     sample = -1
245 |     intervals = []
246 |     bricks_seen = {}
247 |     duration = None
248 |     for ln in lines[2:]:
249 |         tokens = ln.strip().split()
250 | 
251 |         if ln.startswith('Brick:'):
252 | 
253 |             brick_name = tokens[1]
254 |             try:
255 |                 brick_count = bricks_seen[brick_name]
256 |             except KeyError:
257 |                 brick_count = 0
258 |             if brick_count == intvl + 1:
259 |                 intvl += 1
260 |             else:
261 |                 assert brick_count == intvl
262 |             brick_count += 1
263 |             bricks_seen[brick_name] = brick_count
264 | 
265 |         elif ln.__contains__('Interval') and ln.__contains__('Stats'):
266 | 
267 |             assert intvl == last_intvl or intvl == last_intvl + 1
268 |             last_intvl = intvl
269 |             found_interval_output = True
270 | 
271 |         elif ln.__contains__('Cumulative Stats'):
272 | 
273 |             found_cumulative_output = True
274 | 
275 |         elif ln.__contains__('Duration:'):
276 | 
277 |             # we are at end of output for this brick and interval
278 | 
279 |             assert found_cumulative_output ^ found_interval_output
280 |             duration = int(tokens[1])
281 |             if found_interval_output and \
282 |                abs(duration - expected_sample_interval) > 1:
283 |                 print(('WARNING: in sample %d brick %d the sample ' +
284 |                        'interval %d deviates from expected value %d') %
285 |                       (brick_count, sample, duration, expected_sample_interval))
286 |             bricks_in_interval = intervals[intvl]
287 |             brick = bricks_in_interval[brick_name]
288 |             brick.interval = duration
289 |             for fop in fop_names:
290 |                 for typ in time_duration_types:
291 |                     k = fop + '.' + typ
292 |                     try:
293 |                         bfprofile = brick.per_fop[k]
294 |                     except KeyError:
295 |                         bfprofile = zero_bfprofile()
296 |                         brick.per_fop[k] = bfprofile
297 | 
298 |         elif ln.__contains__('Data Read:'):
299 | 
300 |             bytes_read = int(tokens[2])
301 |             per_brick_info = bricks_in_interval[brick_name]
302 |             per_brick_info.bytes_read = bytes_read
303 | 
304 |         elif ln.__contains__('Data Written'):
305 | 
306 |             bytes_written = int(tokens[2])
307 |             per_brick_info = bricks_in_interval[brick_name]
308 |             per_brick_info.bytes_written = bytes_written
309 | 
310 |             # this is the end of per-brick results
311 | 
312 |             found_interval_output = False
313 |             found_cumulative_output = False
314 | 
315 |         elif (found_interval_output or found_cumulative_output) \
316 |              and all_caps_name.match(ln):
317 | 
318 |             # we found a record we're interested in,
319 |             # accumulate table of data for each gluster function
320 | 
321 |             sample += 1
322 |             new_bfprofile = BrickFopProfile(
323 |                     float(tokens[0]), float(tokens[1]), float(tokens[3]),
324 |                     float(tokens[5]), int(tokens[7]))
325 |             op_name = tokens[8]
326 | 
327 |             # op name is a key into dictionary,
328 |             # we record both per-interval and cumulative stats
329 | 
330 |             fop_names.add(op_name)
331 | 
332 |             if found_interval_output:  # keep cum. and interval stats separated
333 |                 op_name += '.' + time_duration_types[1]
334 |             elif found_cumulative_output:
335 |                 op_name += '.' + time_duration_types[0]
336 |             else:
337 |                 raise Exception('FOP-like string %s found outside stats'
338 |                                  % op_name)
339 | 
340 |             if len(intervals) == intvl:
341 |                 bricks_in_interval = {}
342 |                 intervals.append(bricks_in_interval)
343 |             elif len(intervals) == intvl + 1:
344 |                 bricks_in_interval = intervals[intvl]
345 |             else:
346 |                 raise Exception(('intervals table length %d ' +
347 |                                  'does not match interval number %d')
348 |                                  % (len(intervals), intvl))
349 | 
350 |             try:
351 |                 fop_stats = bricks_in_interval[brick_name].per_fop
352 |             except KeyError:
353 |                 bricks_in_interval[brick_name] = BrickProfile()
354 |                 fop_stats = bricks_in_interval[brick_name].per_fop
355 | 
356 |             fop_stats[op_name] = new_bfprofile
357 | 
358 |     sorted_brick_names = sorted(bricks_seen.keys())
359 |     sorted_fop_names = sorted(fop_names)
360 |     return (start_time, intervals)
361 | 
362 | 
363 | # generate timestamp_ms column for pbench 
364 | # given starting time of collection, sampling interval and sample number
365 | 
366 | def gen_timestamp_ms(sample_index):
367 |     return start_time + ((expected_sample_interval * sample_index) * 1000)
368 | 
369 | 
370 | # generate denominator for call rate computation based on duration type
371 | # can't use brick.interval
372 | 
373 | def get_interval(duration_type, interval_index):
374 |     if duration_type == 'cumulative':
375 |         return interval_index * expected_sample_interval
376 |     else:
377 |         return expected_sample_interval
378 | 
379 | # display bytes read and bytes written per brick and for entire volume
380 | # in separate graphs.  If we put them in the same graph in a volume with
381 | # 16 bricks, for example, all you'll see is the per-volume number
382 | # normalize to MB/s with 3 decimal places so 1 KB/s/brick will show
383 | 
384 | def gen_output_bytes(out_dir_path, duration_type):
385 |     bytes_per_MB = 1000000.0
386 |     final_brick_ct = len(sorted_brick_names)
387 |     for direction in directions:
388 |       per_vol_filename = direction + '-volume.csv'
389 |       per_vol_pathname = join(out_dir_path, per_vol_filename)
390 |       with open(per_vol_pathname, 'w') as total_transfer_fh:
391 |         # when we support cumulative data, then we can name files this way
392 |         #direction_filename = duration_type + '_' + direction + '.csv'
393 |         per_brick_filename = direction + '-bricks.csv'
394 |         per_brick_pathname = join(out_dir_path, per_brick_filename)
395 |         with open(per_brick_pathname, 'w') as transfer_fh:
396 |             if pbench_graphs: 
397 |                 transfer_fh.write('timestamp_ms, ')
398 |                 total_transfer_fh.write('timestamp_ms, ')
399 |             transfer_fh.write(','.join(sorted_brick_names))
400 |             total_transfer_fh.write('all\n')
401 |             transfer_fh.write('\n')
402 |             intvl = 0
403 |             for bricks_in_interval in intervals:
404 |                 if pbench_graphs:
405 |                     transfer_fh.write('%d, ' % gen_timestamp_ms(intvl))
406 |                 intvl += 1
407 |                 rate_interval = get_interval(duration_type, intvl) 
408 |                 total_transfer = 0
409 |                 columns = []
410 |                 for b in sorted_brick_names:  # for each brick
411 |                     brick = bricks_in_interval[b]
412 |                     if direction.__contains__('read'):
413 |                         transfer = brick.bytes_read
414 |                     else:
415 |                         transfer = brick.bytes_written
416 |                     total_transfer += transfer
417 |                     columns.append( '%-8.3f ' % ((transfer/rate_interval)/bytes_per_MB))
418 |                 transfer_fh.write(','.join(columns) + '\n')
419 |                 total_transfer_fh.write('%d, %-9.3f\n' % (
420 |                         gen_timestamp_ms(intvl),
421 |                         (total_transfer/rate_interval)/bytes_per_MB))
422 | 
423 | 
424 | # display per-FOP (file operation) stats,
425 | # both per brick and across all bricks
426 | 
427 | def gen_per_fop_stats(out_dir_path, duration_type, stat):
428 |     vol_fop_intervals = []
429 |     for fop in sorted_fop_names:
430 |         #per_fop_filename = duration_type + '_' + stat + '_' + fop + '.csv'
431 |         per_fop_filename = 'brick_' + stat + '_' + fop + '.csv'
432 |         per_fop_path = join(out_dir_path, per_fop_filename)
433 |         with open(per_fop_path, 'a') as fop_fh:
434 |             hdr = ''
435 |             if pbench_graphs:
436 |                 hdr += 'timestamp_ms, '
437 |             hdr += ','.join(sorted_brick_names)
438 |             hdr += 'all\n'
439 |             fop_fh.write(hdr)
440 |             for i in range(0, len(intervals)):
441 |                 if pbench_graphs:
442 |                     fop_fh.write('%d, ' % gen_timestamp_ms(i))
443 |                 bricks_in_interval = intervals[i]
444 |                 all_bfprofile = zero_bfprofile()
445 |                 columns = []
446 |                 for b in sorted_brick_names:  # for each brick
447 |                     brick = bricks_in_interval[b]
448 |                     try:
449 |                         fop_stats = brick.per_fop[fop + '.' + duration_type]
450 |                     except KeyError:
451 |                         fop_stats = zero_bfprofile()
452 |                     columns.append(fop_stats.field2str(stat, brick.interval))
453 |                     all_bfprofile.accumulate(fop_stats)
454 |                 fop_fh.write('%s\n' % ','.join(columns))
455 | 
456 |                 # collect FOP results across all bricks for later
457 | 
458 |                 all_bfprofile.normalize_sum()
459 |                 if len(vol_fop_intervals) == i:
460 |                     vol_fop_interval = {}
461 |                     vol_fop_intervals.append(vol_fop_interval)
462 |                 else:
463 |                     vol_fop_interval = vol_fop_intervals[i]
464 |                 vol_fop_interval[fop] = all_bfprofile
465 |     return vol_fop_intervals
466 | 
467 | def gen_fop_summary(dir_path, duration_type, stat, vol_fop_intervals):
468 |     #vol_fop_profile_path = join(dir_path, duration_type + '_' + stat + '_allfop.csv')
469 |     vol_fop_profile_path = join(dir_path, 'vol_' + stat + '_allfop.csv')
470 |     with open(vol_fop_profile_path, 'w') as vol_fop_fh:
471 |         if pbench_graphs:
472 |             vol_fop_fh.write('timestamp_ms, ')
473 |         vol_fop_fh.write(','.join(sorted_fop_names))
474 |         vol_fop_fh.write('\n')
475 |         for i in range(0, len(vol_fop_intervals)):
476 |             if pbench_graphs:
477 |                 vol_fop_fh.write('%d, ' % gen_timestamp_ms(i))
478 |             vol_fop_profile_interval = vol_fop_intervals[i]
479 |             if duration_type == 'cumulative':
480 |                 sample_interval = (i + 1) * expected_sample_interval
481 |             else:
482 |                 sample_interval = expected_sample_interval
483 |             columns = []
484 |             for fop in sorted_fop_names:
485 |                 per_vol_fop_profile = vol_fop_profile_interval[fop]
486 |                 columns.append(
487 |                     per_vol_fop_profile.field2str(
488 |                         stat, sample_interval))
489 |             vol_fop_fh.write('%s\n' % ','.join(columns))
490 | 
491 | 
492 | # generate graphs in 
493 | # generate output files in separate directory from
494 | # data structure returned by parse_input
495 | 
496 | next_graph_template='''
497 |     <div class="chart">
498 |       <h3 class="chart-header">%s
499 |         <button id="save1">Save as Image</button>
500 |         <div id="svgdataurl1"></div>
501 |       </h3>
502 |       <svg id="chart%d"></svg>
503 |       <canvas id="canvas1" style="display:none"></canvas>
504 |       <script>
505 |         constructChart("lineChart", %d, "%s", 0.00);
506 |       </script>
507 |     </div>
508 | '''
509 | 
510 | def output_next_graph(graph_fh, gr_index):
511 |     (csv_filename, graph_description) = graph_csvs[gr_index]
512 |     gr_index += 1  # graph numbers start at 1
513 |     graph_fh.write( next_graph_template % (
514 |                     graph_description, gr_index, gr_index, csv_filename))
515 | 
516 | # static content of HTML file
517 | 
518 | header='''
519 | <!DOCTYPE HTML>
520 | <html>
521 |   <head>
522 |     <meta charset="utf-8">
523 |     <link href="static/css/v0.2/nv.d3.css" rel="stylesheet" type="text/css" media="all">
524 |     <link href="static/css/v0.2/pbench_utils.css" rel="stylesheet" type="text/css" media="all">
525 |     <script src="static/js/v0.2/function-bind.js"></script>
526 |     <script src="static/js/v0.2/fastdom.js"></script>
527 |     <script src="static/js/v0.2/d3.js"></script>
528 |     <script src="static/js/v0.2/nv.d3.js"></script>
529 |     <script src="static/js/v0.2/saveSvgAsPng.js"></script>
530 |     <script src="static/js/v0.2/pbench_utils.js"></script>
531 |   </head>
532 |   <body class="with-3d-shadow with-transitions">
533 |     <h2 class="page-header">gluster volume profile - summary graphs</h2>
534 | '''
535 | 
536 | trailer='''
537 |   </body>
538 | </html>
539 | '''
540 | 
541 | 
542 | # generate graphs using header, trailer and graph template
543 | 
544 | def gen_graphs(out_dir_path):
545 |     graph_path = join(out_dir_path, 'gvp-graphs.html')
546 |     with open(graph_path, 'w') as graph_fh:
547 |         graph_fh.write(header)
548 |         for j in range(0, len(graph_csvs)):
549 |             output_next_graph(graph_fh, j)
550 |         graph_fh.write(trailer)
551 | 
552 | 
553 | # make link to where javascript etc lives in unpacked tarball
554 | # ASSUMPTION is that output directory is a subdirectory of where this script
555 | # lives (not a sub-subdirectory).  Sorry but that's the only way to generate a
556 | # softlink that works when we copy the csvdir to a different location.
557 | 
558 | def gen_static_softlink(out_dir_path):
559 |     saved_cwd = os.getcwd()
560 |     static_dir = join(saved_cwd, 'static')
561 |     if not os.path.exists(static_dir):
562 |         print('ERROR: sorry, the javascript directory "static" ' + 
563 |               'needs to be in same directory as this script, trying anyway...')
564 |     os.chdir(out_dir_path)
565 |     os.symlink(join('..', 'static'), 'static')
566 |     os.chdir(saved_cwd)
567 | 
568 | # generate everything needed to view the graphs
569 | 
570 | def generate_output(out_dir_path):
571 | 
572 |     for t in [ 'interval' ]:  # cumulative doesn't work yet
573 |         gen_output_bytes(out_dir_path, t)
574 |         for s in stat_names:
575 |             vol_fop_intvls = gen_per_fop_stats(out_dir_path, t, s)
576 |             gen_fop_summary(out_dir_path, t, s, vol_fop_intvls)
577 | 
578 |     gen_graphs(out_dir_path)
579 |     gen_static_softlink(out_dir_path)
580 | 
581 |     sys.stdout.write('Gluster FOP types seen: ')
582 |     for fop_name in sorted_fop_names:
583 |         sys.stdout.write(' ' + fop_name)
584 |     sys.stdout.write('\n')
585 |     sys.stdout.write('Gluster bricks seen: ')
586 |     for brick_name in sorted_brick_names:
587 |         sys.stdout.write(' ' + brick_name)
588 |     sys.stdout.write('\n')
589 |     print('created Gluster statistics files in directory %s' % out_dir_path)
590 |     print('graphs now available at browser URL file://%s/%s/gvp-graphs.html' \
591 |           % (os.getcwd(), out_dir_path))
592 | 
593 | 
594 | # the main program is kept in a subroutine so that it can run on Windows.
595 | 
596 | def main():
597 |     if len(sys.argv) < 2:
598 |         usage('missing gluster volume profile output log filename parameter'
599 |               )
600 |     fn = sys.argv[1]
601 |     parse_input(fn)
602 |     outdir = make_out_dir(fn)
603 |     generate_output(outdir)
604 | 
605 | main()
606 | 


--------------------------------------------------------------------------------