├── plot_bits.py ├── data ├── vw_bits_amazon.csv └── vw_bits_kdd10b.csv ├── README.md ├── .gitattributes ├── vw_bits_check_output.py ├── LICENSE └── vw_bits_os_system.py /plot_bits.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pandas as pd 4 | from matplotlib import pyplot as plt 5 | 6 | input_file = 'vw_bits.csv' 7 | 8 | d = pd.read_csv( input_file ) 9 | plt.plot( d.bits, d.loss ) 10 | plt.show() 11 | -------------------------------------------------------------------------------- /data/vw_bits_amazon.csv: -------------------------------------------------------------------------------- 1 | bits,loss 2 | 25,0.151731 3 | 24,0.151809 4 | 23,0.151951 5 | 22,0.151809 6 | 21,0.151989 7 | 20,0.152146 8 | 19,0.153322 9 | 18,0.156677 10 | 17,0.161874 11 | 16,0.168236 12 | 15,0.167678 13 | 14,0.169568 14 | 13,0.172966 15 | 12,0.176626 16 | 11,0.183911 17 | 10,0.197451 18 | 9,0.210267 19 | 8,0.211582 20 | -------------------------------------------------------------------------------- /data/vw_bits_kdd10b.csv: -------------------------------------------------------------------------------- 1 | bits,loss 2 | 29,0.289295 3 | 28,0.289295 4 | 27,0.289295 5 | 26,0.289295 6 | 25,0.289295 7 | 24,0.291647 8 | 23,0.294692 9 | 22,0.298081 10 | 21,0.301834 11 | 20,0.304644 12 | 19,0.30722 13 | 18,0.310092 14 | 17,0.3135 15 | 16,0.317509 16 | 15,0.322234 17 | 14,0.327931 18 | 13,0.334622 19 | 12,0.342404 20 | 11,0.352112 21 | 10,0.362543 22 | 9,0.373615 23 | 8,0.383729 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Running external programs from Python 2 | ===================================== 3 | 4 | See [http://fastml.com/running-external-programs-from-python/](http://fastml.com/running-external-programs-from-python/) for description. 5 | 6 | plot_bits.py - a script for plotting output data (see data/) 7 | vw_bits_check_output.py - main script, check_output() version 8 | vw_bits_os_system.py - main script, os.system() version 9 | 10 | License: BSD 11 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /vw_bits_check_output.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | "run vw with various bits settings, save reported losses to a csv file" 4 | "subprocess.check_output() version" 5 | 6 | import re 7 | import csv 8 | import subprocess 9 | 10 | max_bits = 25 11 | min_bits = 8 12 | 13 | path_to_cache = 'data/vw/train.vw.cache' 14 | output_file = 'data/vw_bits_amazon.csv' 15 | vw_params = '--loss_function logistic --passes 20 -q ee --l2 0.0000005' 16 | 17 | ### 18 | 19 | def get_loss( output ): 20 | pattern = 'average loss = ([0-9.e]+)\n' 21 | m = re.search( pattern, output ) 22 | loss = m.group( 1 ) 23 | return loss 24 | 25 | ### 26 | 27 | o_f = open( output_file, 'wb' ) 28 | writer = csv.writer( o_f ) 29 | writer.writerow( [ 'bits', 'loss' ] ) 30 | 31 | for b in range( max_bits, min_bits - 1, -1 ): 32 | 33 | cmd = 'vw {} --cache_file {} -b {} 2>&1'.format( vw_params, path_to_cache, b ) 34 | print cmd 35 | 36 | output = subprocess.check_output( '{} | tee /dev/stderr'.format( cmd ), shell = True ) 37 | loss = get_loss( output ) 38 | 39 | print "\nbits: {}, loss: {}\n".format( b, loss ) 40 | 41 | writer.writerow( [ b, loss ] ) 42 | o_f.flush() 43 | 44 | 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 Zygmunt Zając 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /vw_bits_os_system.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | "run vw with various bits settings, save reported losses to a csv file" 4 | "os.system() version" 5 | 6 | import os 7 | import re 8 | import csv 9 | 10 | max_bits = 29 11 | min_bits = 8 12 | 13 | path_to_cache = 'data/vw/train.cache' 14 | tmp_log_file = 'data/vw/tmp_log.txt' 15 | output_file = 'data/vw_bits_kdd10b.csv' 16 | 17 | ### 18 | 19 | def get_loss( output ): 20 | pattern = 'average loss = (.*?)\n' 21 | m = re.search( pattern, output ) 22 | loss = m.group( 1 ) 23 | return loss 24 | 25 | ### 26 | 27 | o_f = open( output_file, 'wb' ) 28 | writer = csv.writer( o_f ) 29 | writer.writerow( [ 'bits', 'loss' ] ) 30 | 31 | for b in range( max_bits, min_bits - 1, -1 ): 32 | 33 | cmd = 'vw --loss_function logistic --cache_file {} -b {} 2>&1 | tee {}'.format( path_to_cache, b, tmp_log_file ) 34 | os.system( cmd ) 35 | 36 | output = open( tmp_log_file, 'r' ).read() 37 | loss = get_loss( output ) 38 | 39 | # redirect stderr to tee, tee to stdout & stderr, catch stdout 40 | """ 41 | cmd = 'vw --loss_function logistic --cache_file {} -b {} 2>&1'.format( path_to_cache, b ) 42 | output = subprocess.check_output( '{} | tee /dev/stderr'.format( cmd ), shell = True ) 43 | loss = get_loss( output ) 44 | """ 45 | 46 | print "\nbits: {}, loss: {}\n".format( b, loss ) 47 | 48 | writer.writerow( [ b, loss ] ) 49 | o_f.flush() 50 | 51 | --------------------------------------------------------------------------------