├── scores.png
├── weight.2015.png
├── 1992-ariel-dmv.png
├── vw-convergence.png
├── hackernews-2016-08-12.png
├── username
├── sort-by-abs
├── weight.2015.csv
├── cgi
├── index.html
├── data2image
└── process.cgi
├── train-to-items
├── Licence.md
├── lifestyle-csv2vw
├── HOWTO.md
├── date-weight.r
├── Longevity.md
├── score-chart.r
├── Makefile
├── QandA.md
├── ariel.csv
├── vw-varinfo2
└── README.md
/scores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arielf/weight-loss/HEAD/scores.png
--------------------------------------------------------------------------------
/weight.2015.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arielf/weight-loss/HEAD/weight.2015.png
--------------------------------------------------------------------------------
/1992-ariel-dmv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arielf/weight-loss/HEAD/1992-ariel-dmv.png
--------------------------------------------------------------------------------
/vw-convergence.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arielf/weight-loss/HEAD/vw-convergence.png
--------------------------------------------------------------------------------
/hackernews-2016-08-12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arielf/weight-loss/HEAD/hackernews-2016-08-12.png
--------------------------------------------------------------------------------
/username:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | $user = $ENV{'USER'} || $ENV{'LOGNAME'} || getlogin || (getpwuid($>))[0];
4 |
5 | printf "%s\n", $user;
6 |
7 |
--------------------------------------------------------------------------------
/sort-by-abs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl -w
2 | # vim: sw=4 ts=4
3 | #
4 |
5 | #
6 | # Convert every line to a pair: absolute-value, as-is-line
7 | #
8 | my @LinesInMem = ();
9 |
10 | while (<>) {
11 | my $first_field = (split(' ', $_))[0];
12 | push(@LinesInMem, [abs($first_field), $_]);
13 | }
14 |
15 | #
16 | # In the end print original lines in descending order of the
17 | # absolute value
18 | #
19 | print map $_->[1], sort {
20 | $b->[0] <=> $a->[0]
21 | } @LinesInMem;
22 |
23 |
--------------------------------------------------------------------------------
/weight.2015.csv:
--------------------------------------------------------------------------------
1 | Date,Pounds
2 | 2015-02-10,194.0
3 | 2015-10-10,187.0
4 | 2016-01-10,184.0
5 | 2016-04-10,182.0
6 | 2016-05-10,180.0
7 | 2016-05-20,178.0
8 | 2016-05-22,178.6
9 | 2016-05-24,177.8
10 | 2016-05-26,180.0
11 | 2016-05-29,180.0
12 | 2016-05-30,179.0
13 | 2016-05-31,177.6
14 | 2016-06-02,178.4
15 | 2016-06-02,177.6
16 | 2016-06-10,178.0
17 | 2016-06-16,177.4
18 | 2016-06-21,176.4
19 | 2016-06-22,177.0
20 | 2016-06-23,176.6
21 | 2016-06-25,176.0
22 | 2016-06-27,175.6
23 | 2016-06-28,175.4
24 | 2016-06-30,175.8
25 | 2016-07-01,175.8
26 | 2016-07-02,175.4
27 | 2016-07-03,175.6
28 | 2016-07-04,174.6
29 | 2016-07-07,175.2
30 | 2016-07-08,174.6
31 | 2016-07-09,174.0
32 |
--------------------------------------------------------------------------------
/cgi/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | File Upload
6 |
7 |
8 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/train-to-items:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl -w
2 | # vim: ts=4 sw=4 expandtab
3 | #
4 | # Convert a regular train file (one day, many-items, per line)
5 | # to a one line per-item train file so we can easily calculate
6 | # confidence intervals for each element separately by predicting.
7 | #
8 | my %Items = ();
9 |
10 | # Collect items
11 | while (<>) {
12 | # remove label, leave items (input features) only
13 | s/^.*\|\s*//;
14 | # Loop on all items
15 | while (/(\S+)/g) {
16 | my $item = $1;
17 | # Remove (optional) weights if any
18 | $item =~ s/:.*$//;
19 | $Items{$item} = 1;
20 | }
21 | }
22 |
23 | #
24 | # Print items, one per-line (test-file for prediction)
25 | #
26 | for my $item (sort keys %Items) {
27 | printf " '%s| %s\n", $item, $item;
28 | }
29 |
30 |
--------------------------------------------------------------------------------
/cgi/data2image:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | unset PATH
3 | export PATH="/bin:/usr/bin:$PWD/..:$PWD/../.."
4 |
5 | fatal() {
6 | echo -- "$@" 1>&2
7 | exit 1
8 | }
9 |
10 | usage() {
11 | fatal "Usage: $0 "
12 | }
13 |
14 | link_up() {
15 | for f in "$@"; do
16 | for d in .. ../.. ../../..; do
17 | if [[ -f "$d/$f" ]]; then
18 | if [[ ! -e "$f" ]]; then
19 | ln -s "$d/$f" .
20 | fi
21 | continue
22 | fi
23 | done
24 | done
25 | }
26 |
27 | case "$#" in
28 | (1) data_file="$1"
29 | train_file="${data_file}.train"
30 | ;;
31 | (*) usage ;;
32 | esac
33 |
34 | #
35 | # -- Use already written make/rules to generate the image
36 | #
37 | NAME=$(basename $(/bin/tempfile -p _zZ))
38 |
39 | #
40 | # -- delayed cleanup of the $NAME directory
41 | #
42 | find . -type d -name '_zZ*' -mmin +60 | xargs -r /bin/rm -rf &
43 |
44 | mkdir $NAME
45 | /bin/mv $data_file $NAME/data.csv
46 | cd $NAME
47 |
48 | link_up Makefile sort-by-abs lifestyle-csv2vw score-chart.r vw vw-varinfo
49 |
50 | make NAME=data score-chart
51 |
52 | echo $NAME/data.scores.png
53 |
54 |
--------------------------------------------------------------------------------
/Licence.md:
--------------------------------------------------------------------------------
1 | ### BSD 2-Clause License
2 |
3 | OWNER = Ariel Faigon
4 | YEAR = 2016
5 |
6 | In the original BSD license, both occurrences of the phrase
7 | "COPYRIGHT HOLDERS AND CONTRIBUTORS" in the disclaimer read "REGENTS
8 | AND CONTRIBUTORS".
9 |
10 | Here is the license template:
11 |
12 | Copyright (c) 2012-2016, Ariel Faigon
13 | All rights reserved.
14 |
15 | Redistribution and use in source and binary forms, with or without
16 | modification, are permitted provided that the following conditions
17 | are met:
18 |
19 | 1. Redistributions of source code must retain the above copyright
20 | notice, this list of conditions and the following disclaimer.
21 |
22 | 2. Redistributions in binary form must reproduce the above copyright
23 | notice, this list of conditions and the following disclaimer in
24 | the documentation and/or other materials provided with the
25 | distribution.
26 |
27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 | COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 | POSSIBILITY OF SUCH DAMAGE.
39 |
40 |
41 |
--------------------------------------------------------------------------------
/lifestyle-csv2vw:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl -w
2 | #
3 | # Generate a VW training-set from our raw data CSV
4 | #
5 | use Scalar::Util qw(looks_like_number);
6 |
7 | my $SepPat = qr{(?:\s*,\s*|\s+)};
8 | my $Interactive = -t STDOUT;
9 |
10 | # Default days-window
11 | my $NDays = 1;
12 |
13 | sub process_args() {
14 | for my $arg (@ARGV) {
15 | if (! -f $arg and $arg =~ /^\d$/) {
16 | $NDays = $arg;
17 | } else {
18 | push(@files, $arg);
19 | }
20 | }
21 | @ARGV = @files;
22 | }
23 |
24 | sub process_input() {
25 | my $prev_weight;
26 | my @daily = ();
27 |
28 | while (<>) {
29 | # Skip comments or header-line
30 | next if (/^[#A-Za-z]/);
31 |
32 | chomp;
33 | # Windows line endings, just in case...
34 | tr/\015//d;
35 |
36 | my ($date, $weight, @factors) = split($SepPat);
37 |
38 | next unless ((defined $date) and $date =~ /^\d/);
39 |
40 | # Only generate a training set if everything is defined and
41 | # we have a prior day weight to compare to
42 | unless ((defined $weight) and looks_like_number($weight)) {
43 | $weight = '' unless (defined $weight);
44 | if ($Interactive) {
45 | warn "$ARGV:$. weight: '$weight' is not a number - line ignored\n";
46 | }
47 | undef $prev_weight;
48 | next;
49 | }
50 |
51 |
52 | #
53 | # -- collect daily (gain + factors) data points in @daily
54 | #
55 | if ((defined $prev_weight) && scalar(@factors) > 0) {
56 | my $gain = $weight - $prev_weight;
57 | my @day_list = ($gain, @factors);
58 | push(@daily, \@day_list);
59 | }
60 | $prev_weight = $weight;
61 | }
62 |
63 | #
64 | # Output vw training-set
65 | #
66 | for (my $i = 0; $i < @daily; $i++) {
67 | my $start = $i;
68 | my $end = $start + $NDays - 1;
69 | my $sum_gain = 0.0;
70 | my @sum_factors = ();
71 | for (my $j = $i; $j <= $end; $j++) {
72 | #
73 | # -- Aggregate consecutive days gains and factors - up to $NDays
74 | #
75 | my $day_list = $daily[$i];
76 | my @gain_factors = @$day_list;
77 | my ($gain, @factors) = @gain_factors;
78 | $sum_gain += $gain;
79 | push(@sum_factors, @factors);
80 | printf "%.2f | @sum_factors\n", $sum_gain;
81 | }
82 | }
83 | }
84 |
85 | #
86 | # -- main
87 | #
88 | process_args();
89 | process_input();
90 |
91 |
--------------------------------------------------------------------------------
/HOWTO.md:
--------------------------------------------------------------------------------
1 | # How to run this code
2 |
3 | ------------------
4 | ## Prerequisites:
5 |
6 | This code depends on:
7 |
8 | >- vowpal wabbit (aka vw)
9 | >- R
10 | >- ggplot2 (an R library to create charts)
11 | >- GNU make
12 | >- git (to clone this repository)
13 | >- bash, perl, and python (these are usually preinstalled and available on all Linux and MacOs systems)
14 |
15 | ------------------
16 | ## Installation of prerequisites:
17 |
18 | #### Linux: Ubuntu, Mint, or any Debian derivative
19 |
20 | > sudo apt-get install make vowpal-wabbit r-base r-base-core r-cran-ggplot2 git
21 |
22 | #### Other Linux systems
23 |
24 | Packages are usually named differently.
25 | Contributions to this section very welcome
26 |
27 | #### MacOs / OS-X
28 |
29 | Use `brew` to install the above packages
30 | Contributions to this section very welcome
31 |
32 | #### Windows
33 |
34 | The only sane way to run this code in a Windows environment, is to install run Ubuntu Linux on a VM (virtual machine) inside Windows, and use the Ubuntu instructions in the VM.
35 |
36 | For instructions how to set up a VM on Windows, follow these youtube videos:
37 | > - http://www.howtogeek.com/170870/5-ways-to-run-linux-software-on-windows/
38 | > - https://www.youtube.com/watch?v=uzhA5p-EzqY
39 |
40 | One you have Ubuntu on Windows you just install all the prerequisites. e.g. in a terminal:
41 |
42 | > sudo apt-get install make r-base r-base-core r-cran-ggplot2 vowpal-wabbit git
43 |
44 | inside it, to run everything from start to finish.
45 |
46 |
47 | ------------------
48 | ## Running the code
49 |
50 | Using git, you clone this repository:
51 |
52 | > git clone https://github.com/arielf/weight-loss
53 |
54 | And change directory to it:
55 |
56 | > cd weight-loss
57 |
58 | Finally type:
59 |
60 | > make
61 |
62 | It should produce a file `scores.txt` with your weight-loss scores.
63 |
64 | To get a chart of the scores:
65 |
66 | > make sc
67 |
68 | or
69 |
70 | > make score-chart
71 |
72 | ------------------
73 | ## Changing `make` parameters
74 |
75 | There are a few adjustable variables (which have reasonable defaults) in the
76 | `Makefile`, and which you may change if interested:
77 |
78 | To change these you may call `make` with arguments changing the
79 | values, like this:
80 |
81 | > make VarName1=Value1 VarName2=Value2 ...
82 |
83 | The current variables and their defaults settings are:
84 |
85 | BS = 7 # -- bootstrapping rounds
86 | P = 4 # -- multiple passes over the data
87 | L = 0.05 # -- learning rate
88 | L2 = 1.85201e-08 # -- L2 regularization
89 | NDAYS = 3 # -- Aggregate consecutive daily-data up to NDAYS days
90 |
91 |
92 |
--------------------------------------------------------------------------------
/cgi/process.cgi:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl -w
2 |
3 | use strict;
4 | use CGI;
5 | use CGI::Carp qw(fatalsToBrowser);
6 | use File::Basename;
7 | $ENV{'PATH'} = '/bin:/usr/bin:.';
8 |
9 | use constant IS_MOD_PERL => exists $ENV{'MOD_PERL'};
10 | use constant IS_CGI => IS_MOD_PERL || exists $ENV{'GATEWAY_INTERFACE'};
11 |
12 | my $Q = {};
13 | my $Filename = '';
14 | my $Email = '';
15 | my $UploadDir = 'Uploads';
16 | my $UploadFH;
17 |
18 | if (IS_CGI) {
19 | open(STDERR, ">&STDOUT");
20 | $| = 1;
21 | $CGI::POST_MAX = 1024 * 5000;
22 | $Q = new CGI;
23 | $Filename = $Q->param("data");
24 | $Email = $Q->param("email_address");
25 | $UploadFH = $Q->upload("data");
26 | print $Q->header();
27 | print $Q->start_html(title => "Weight-Loss Data", -bgcolor => "#ffffff");
28 | } else {
29 | $Filename = 'data.csv';
30 | $UploadFH = 'STDIN';
31 | };
32 |
33 |
34 | sub cgi_die(@) {
35 | die "@_\n";
36 | }
37 |
38 | sub upload($) {
39 | my ($filename) = @_;
40 | unless ((defined $filename) and $filename =~ /\w/) {
41 | cgi_die("You must provide a filename to upload\n");
42 | }
43 | $filename =~ s,.*/,,; # strip leading dirs
44 | $filename =~ tr/ /_/;
45 | $filename =~ tr/A-Za-z0-9_.-/_/cs;
46 |
47 | my $store_path = "$filename";
48 | open(my $store_fh, ">$store_path") or die "$0: open(>$store_path): $!\n";
49 | binmode $store_fh;
50 |
51 | my $chunk;
52 | while ($chunk = <$UploadFH>) {
53 | print $store_fh $chunk;
54 | }
55 | close $store_fh;
56 | $store_path;
57 | }
58 |
59 | sub file2str($) {
60 | my ($file) = @_;
61 | local $/; undef $/;
62 | open(my $fh, $file) || cgi_die("$0: $file: $!\n");
63 | my $str = <$fh>;
64 | close $fh;
65 | $str;
66 | }
67 |
68 | sub data2chart($) {
69 | my ($data_file) = @_;
70 | my $output = `../data2image "$data_file"`;
71 | my ($tmp_dir) = ($output =~ m{^(_z[^/]+)/data.scores.png$}m);
72 | cgi_die("Something failed - Sorry\n")
73 | unless (defined $tmp_dir);
74 | $tmp_dir;
75 | }
76 |
77 | sub body_content($$) {
78 | my ($scores_txt, $chart_path) = @_;
79 | qq[
80 | Your Scores
81 |
82 | $scores_txt
83 |
84 |
85 |
Your Chart (click to enlarge)
86 |
87 |
88 |
89 | ]
90 | }
91 |
92 | sub generate_result_page($) {
93 | my ($tmp_dir) = @_;
94 |
95 | my $chart_path = "Uploads/$tmp_dir/data.scores.png";
96 | my $scores_path = "$tmp_dir/scores.txt";
97 | my $scores_txt = file2str($scores_path);
98 | print body_content($scores_txt, $chart_path);
99 | $Q->end_html();
100 | }
101 |
102 | #
103 | # -- main
104 | #
105 | chdir($UploadDir) || cgi_die("chdir($UploadDir): $!\n");
106 |
107 | my $data_path = upload($Filename);
108 | my $tmp_dir = data2chart($data_path);
109 | generate_result_page($tmp_dir);
110 |
111 |
--------------------------------------------------------------------------------
/date-weight.r:
--------------------------------------------------------------------------------
1 | #!/usr/bin/Rscript --vanilla
2 | #
3 | # Generate date vs. weight chart
4 | #
5 | eprintf <- function(...) cat(sprintf(...), sep='', file=stderr())
6 |
7 | library(ggplot2)
8 | library(scales) # for date_breaks()
9 |
10 | MaxMonths=20
11 | MaxDays=ceiling(MaxMonths*30.4375)
12 |
13 | # --- styles
14 | ratio = 1.61803398875
15 | W = 6
16 | H = W / ratio
17 | DPI = 200
18 | FONTSIZE = 8
19 | MyGray = 'grey50'
20 |
21 | title.theme = element_text(family="FreeSans", face="bold.italic",
22 | size=FONTSIZE)
23 | x.title.theme = element_text(family="FreeSans", face="bold.italic",
24 | size=FONTSIZE-1, vjust=-0.1)
25 | y.title.theme = element_text(family="FreeSans", face="bold.italic",
26 | size=FONTSIZE-1, angle=90, vjust=0.2)
27 | x.axis.theme = element_text(family="FreeSans", face="bold",
28 | size=FONTSIZE-2, colour=MyGray)
29 | y.axis.theme = element_text(family="FreeSans", face="bold",
30 | size=FONTSIZE-2, colour=MyGray)
31 | legend.theme = element_text(family="FreeSans", face="bold.italic",
32 | size=FONTSIZE-1, colour="black")
33 |
34 |
35 | Params <- list()
36 | process.args <- function() {
37 | argv <- commandArgs(trailingOnly = TRUE)
38 | fileArgs <- c()
39 | for (arg in argv) {
40 | # Arguments can be either:
41 | # Params: name=value
42 | # or:
43 | # Files: file arguments
44 | # eprintf("arg: %s\n", arg)
45 | var.val <- unlist(strsplit(arg, '='))
46 | if (length(var.val) == 2) {
47 | var <- var.val[1]
48 | val <- var.val[2]
49 | Params[[var]] <<- val
50 | # eprintf('Params$%s=%s\n', var, val)
51 | } else {
52 | fileArgs <- c(fileArgs, arg)
53 | }
54 | }
55 | # for (n in names(Params)) {
56 | # eprintf("Params[[%s]]: %s\n", n, Params[[n]]);
57 | # }
58 | # Params are assigned to global array Params[]
59 | # rest are returned as files
60 | fileArgs
61 | }
62 |
63 | # --- main
64 | FileArgs <- process.args()
65 | CsvFile <- ifelse(
66 | length(FileArgs) > 0 && nchar(FileArgs[1]) > 0,
67 | FileArgs[1],
68 | 'weight.2015.csv'
69 | )
70 | PngFile <- ifelse(
71 | length(FileArgs) > 1 && nchar(FileArgs[2]) > 0,
72 | FileArgs[2],
73 | gsub(CsvFile, pattern='.[tc]sv', replacement='.png')
74 | )
75 |
76 | Title <- ifelse(length(Params$title),
77 | Params$title,
78 | 'weight by date'
79 | )
80 |
81 | Xlab <- Params$xlab
82 | Ylab <- ifelse(length(Params$ylab),
83 | Params$ylab,
84 | 'Lb'
85 | )
86 |
87 | d <- read.csv(CsvFile, h=T, colClasses=c('character', 'numeric'))
88 |
89 | # Trim data to MaxDays
90 | N <- nrow(d)
91 | if (N > MaxDays) {
92 | d <- d[(N-MaxDays):N, ]
93 | }
94 |
95 | g <- ggplot(data=d, aes(x=as.POSIXct(Date), y=Pounds)) +
96 | scale_y_continuous(breaks=150:195) +
97 | scale_x_datetime(breaks = date_breaks("2 months"),
98 | labels = date_format("%Y\n%b")) +
99 | geom_line(aes(y=Pounds), size=0.3, col='#0077ff') +
100 | geom_point(aes(y=Pounds), pch=20, size=0.8) +
101 | ggtitle(Title) +
102 | ylab(Ylab) + xlab(Xlab) +
103 | theme(
104 | plot.title=title.theme,
105 | axis.title.y=y.title.theme,
106 | axis.title.x=x.title.theme,
107 | axis.text.x=x.axis.theme,
108 | axis.text.y=y.axis.theme
109 | )
110 |
111 | ggsave(g, file=PngFile, width=W, height=H, dpi=DPI)
112 |
113 |
--------------------------------------------------------------------------------
/Longevity.md:
--------------------------------------------------------------------------------
1 | Longevity: Lifestyle factors
2 | ============================
3 |
4 | ### _The following is a collection of factors affecting longevity_
5 |
6 | What better source you can have for learning what leads to a longer
7 | life, than asking the longest living people themselves?
8 |
9 | My main source in the [wikipedia page about verified longest living people](https://en.wikipedia.org/wiki/List_of_the_verified_oldest_people)
10 |
11 | In their own opinions/words.
12 |
13 | #### [Jeanne Calment](https://en.wikipedia.org/wiki/Jeanne_Calment) (122 years, 164 days)
14 |
15 | Calment ascribed her longevity and relatively youthful appearance
16 | for her age to a diet rich in *olive oil* (which she also rubbed
17 | onto her skin), as well as a diet of port wine, and ate nearly one
18 | kilogram (2.2 lb) of chocolate every week. She also credited her
19 | calmness, saying, "That's why they call me Calment." Calment
20 | reportedly remained mentally intact until her very end.
21 |
22 | #### [Sarah Knauss](https://en.wikipedia.org/wiki/Sarah_Knauss) (119 years, 97 days)
23 |
24 | Her daughter, Kathryn Sullivan, aged 96 at the time, opined that
25 | Knauss is "a very tranquil person and nothing fazes her."
26 |
27 | #### [Misao Okawa](https://en.wikipedia.org/wiki/Misao_Okawa) (117 years, 27 days)
28 |
29 | Okawa said that "sushi and *sleep*" were the reasons why she lived so long.
30 |
31 |
32 | #### [Maria Capovilla](https://en.wikipedia.org/wiki/Mar%C3%ADa_Capovilla) (116 years 347 days)
33 |
34 | She never smoked or drank hard liquor.
35 |
36 | #### [Susannah Mushatt Jones](https://en.wikipedia.org/wiki/Susannah_Mushatt_Jones) (116 years and 311 days)
37 |
38 | She only took high-blood pressure medication and a multivitamin.
39 | Jones never smoked or consumed alcohol. She *slept about ten
40 | hours a night* and napped throughout the day. For breakfast, she
41 | always ate four strips of bacon along with *scrambled eggs* and
42 | grits. She also ate bacon throughout the day.
43 |
44 |
45 | #### [Emma Morano](https://en.wikipedia.org/wiki/Emma_Morano) (116 years, 277 days)
46 | When asked about the secret of her longevity, she said that she had
47 | never used drugs, eats *three eggs a day*, drinks a glass of homemade
48 | brandy, and enjoys a chocolate sometimes, but, above all, she thinks
49 | positively about the future.
50 |
51 | Elsewhere: Morano credits her long life to her diet of raw eggs
52 | and being single.
53 |
54 |
55 | #### [Gertrude Weaver](https://en.wikipedia.org/wiki/Gertrude_Weaver) (116 years, 276 days)
56 | Weaver told the Associated Press that there were three factors that
57 | have contributed to her longevity: "Trusting in the Lord, hard work
58 | and loving everybody." Weaver added a fourth factor when she
59 | told Time magazine that trying to do your best is another factor
60 | adding: "Just do what you can, and if we can't, we can't"
61 | or, in other words, "Kindness"
62 |
63 | #### [Besse Cooper](https://en.wikipedia.org/wiki/Besse_Cooper) (116 years, 100 days)
64 |
65 | Cooper reportedly attributed her longevity to "minding her own business"
66 | and *avoiding junk food.*
67 |
68 |
69 | #### [Jeralean Talley](https://en.wikipedia.org/wiki/Jeralean_Talley) (116 years, 25 days)
70 |
71 | She lived by the Golden Rule: "Treat others the way you want to
72 | be treated". She was known in the community for her wisdom and
73 | wit, she had sometimes been asked for advice. She had previously
74 | advised people to use common sense, saying "I don't have
75 | much education but what little sense I got, I try to use it."
76 |
77 |
78 | #### Coincidence?
79 |
80 | I find it fascinating that so many of my own discoveries of
81 | factors affecting weight-loss have also to do with longevity.
82 |
83 | Being calm (and reducing stress) seems to be an important
84 | factor I appear to have missed. Could it be hidden in
85 | the "stayhome" feature?
86 |
87 | Chocolate appears twice above, I wonder which type of chocolate
88 | because bittersweet/dark chocolate typically has much less sugar than
89 | milk chocolate.
90 |
91 |
--------------------------------------------------------------------------------
/score-chart.r:
--------------------------------------------------------------------------------
1 | #!/usr/bin/Rscript --vanilla
2 | #
3 | # Generate weight gain/loss factor chart
4 | #
5 | library(ggplot2)
6 |
7 | eprintf <- function(...) cat(sprintf(...), sep='', file=stderr())
8 |
9 | # --- styles
10 | ratio = 1.61803398875
11 | W = 10
12 | H = W * ratio
13 | DPI = 200
14 | FONTSIZE = 8
15 | MyGray = 'grey50'
16 |
17 | # --- Favorite fonts
18 | Family='FreeSans'
19 | Face='bold.italic'
20 |
21 | title.theme = element_text(family=Family, face=Face,
22 | size=FONTSIZE)
23 | x.title.theme = element_text(family=Family, face=Face,
24 | size=FONTSIZE-1, vjust=-0.1)
25 | y.title.theme = element_text(family=Family, face=Face,
26 | size=FONTSIZE-1, angle=90, vjust=0.2)
27 | x.axis.theme = element_text(family=Family, face="bold",
28 | size=FONTSIZE-2, colour=MyGray)
29 | y.axis.theme = element_text(family=Family, face="bold",
30 | size=FONTSIZE-2, colour=MyGray)
31 | legend.theme = element_text(family=Family, face=Face,
32 | size=FONTSIZE-1, colour="black")
33 |
34 | Params <- list()
35 | process.args <- function() {
36 | argv <- commandArgs(trailingOnly = TRUE)
37 | fileArgs <- c()
38 | for (arg in argv) {
39 | # Arguments can be either:
40 | # Params: name=value
41 | # or:
42 | # Files: file arguments
43 | # eprintf("arg: %s\n", arg)
44 | var.val <- unlist(strsplit(arg, '='))
45 | if (length(var.val) == 2) {
46 | var <- var.val[1]
47 | val <- var.val[2]
48 | Params[[var]] <<- val
49 | # eprintf('Params$%s=%s\n', var, val)
50 | } else {
51 | fileArgs <- c(fileArgs, arg)
52 | }
53 | }
54 | # for (n in names(Params)) {
55 | # eprintf("Params[[%s]]: %s\n", n, Params[[n]]);
56 | # }
57 | # Params are assigned to global array Params[]
58 | # rest are returned as files
59 | fileArgs
60 | }
61 |
62 | # --- main
63 | FileArgs <- process.args()
64 | CsvFile <- ifelse(
65 | length(FileArgs) > 0 && nchar(FileArgs[1]) > 0,
66 | FileArgs[1],
67 | 'scores.csv'
68 | )
69 | PngFile <- ifelse(
70 | length(FileArgs) > 1 && nchar(FileArgs[2]) > 0,
71 | FileArgs[2],
72 | gsub(CsvFile, pattern='.[tc]sv', replacement='.png')
73 | )
74 |
75 | Title <- ifelse(length(Params$title),
76 | Params$title,
77 | 'Relative weight-loss factor importance\n(negative/green means causing weight-loss\npositive/red means causing weight-gain)'
78 | )
79 |
80 | # -- Color weight-gains in red and weigh-losses in green for effect
81 | # (this is one uncommon case where a 'positive' quantity is
82 | # actually undesired/negative)
83 | MyGreen = '#00cc00'
84 | MyRed = '#ff0000'
85 |
86 | d <- read.csv(CsvFile, h=T, sep=',', colClasses=c('character', 'numeric'))
87 |
88 | N <- nrow(d)
89 | CrossIdx = which.min(abs(d$RelScore))
90 |
91 | d <- transform(d,
92 | FeatureNo = 1:N,
93 | TextOffset = (ifelse(d$RelScore > 0, -2, +2)),
94 | TextJust = d$RelScore > 0,
95 | FillColor = (ifelse(d$RelScore > 0, MyRed, MyGreen)),
96 | FeatureLabels = sprintf("%s (%.1f%%)", d$FeatureName, d$RelScore)
97 | )
98 |
99 |
100 | g <- ggplot(
101 | data=d,
102 | aes(
103 | x=FeatureNo,
104 | y=RelScore
105 | ),
106 | xlim(-100, 100)
107 | ) +
108 | geom_bar(
109 | stat='identity',
110 | position='identity',
111 | width=0.8,
112 | fill=d$FillColor,
113 | ) +
114 | geom_text(label=d$FeatureLabels,
115 | y=d$TextOffset, x=d$FeatureNo,
116 | size=2.0, angle=0, hjust=d$TextJust) +
117 | ggtitle(Title) +
118 | ylab('Relative Importance (%pct)') +
119 | xlab(NULL) +
120 | annotate("text", x=CrossIdx+20, y=+35, label='Weight\nGain',
121 | angle=0, colour=MyRed, size=9,
122 | family=Family, fontface=Face) +
123 | annotate("text", x=CrossIdx-20, y=-35, label='Weight\nLoss',
124 | angle=0, colour=MyGreen, size=9,
125 | family=Family, fontface=Face) +
126 | coord_flip() +
127 | theme(
128 | plot.title=title.theme,
129 | axis.title.y=y.title.theme,
130 | axis.title.x=x.axis.theme,
131 | axis.text.x=x.axis.theme,
132 | axis.text.y=element_blank()
133 | )
134 |
135 | ggsave(g, file=PngFile, width=W, height=H, dpi=DPI)
136 |
137 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | #
2 | # Makefile for diet and weight-loss monitoring
3 | # vim: ts=8 sw=8 noexpandtab nosmarttab
4 | #
5 | # Goal:
6 | # - Find out which lifestyle factors affect your weight the most
7 | # - Find out which foods make you gain or lose weight
8 | # - Find confidence-level (ranges) for each food/lifestyle item
9 | #
10 | # Requires you to:
11 | # - Weight yourself once a day
12 | # - Record what you do/eat daily
13 | #
14 | # How to run this code:
15 | # - Install vowpal-wabbit (vw)
16 | # - Clone this repo: https://github.com/arielf/weight-loss
17 | # - Place your data in .csv
18 | # - Type 'make'
19 | #
20 | # Additional 'make' targets (make ):
21 | #
22 | # c/charts
23 | # Creates optional charts
24 | #
25 | # sc
26 | # Creates the per-item scores chart only
27 | #
28 | # m/model
29 | # Creates a model file from the daily train-file
30 | #
31 | # t/train
32 | # Creates the daily-delta (weight change target) train file
33 | #
34 | # i/items
35 | # Creates 'by single-item' train file. This is a "pretend"
36 | # data-file as if we only had one-item/day to see what's
37 | # its "pretend-isolated" effect assuming everything else is equal.
38 | #
39 | # conf/confidence/r/range
40 | # Generates a sorted *.range file, in which each item appears
41 | # together with its 'confidence range' [min max]. This can
42 | # help you figure out how certain we are for each variable.
43 | # e.g. a line like this:
44 | # -0.024568 carrot -0.071207 0.026108
45 | # means based on the given data, the machine-learning process
46 | # estimates carrot makes you lose a bit of weight
47 | # (average is a negative: -0.024568) but the confidence
48 | # daily range is from -0.071207 (loss) to 0.026108 (gain)
49 | # so there's a low confidence in this result.
50 | #
51 | # conv
52 | # Generates a convergence chart of the learning process
53 | #
54 | # clean
55 | # Cleans-up generated files
56 | #
57 | PATH := $(PATH)::.
58 | NAME = $(shell ./username)
59 |
60 | # -- scripts/programs
61 | VW = vw
62 | TOVW := lifestyle-csv2vw
63 | VARINFO := vw-varinfo2
64 | SORTABS := sort-by-abs
65 |
66 | # Adjustable parameters: to change call 'make' with NAME=Value:
67 | # --bootsrap rounds:
68 | BS = 7
69 | # --passes:
70 | P = 4
71 | # -- learning rate
72 | L = 0.05
73 | # L2 regularization
74 | L2 = 1.85201e-08
75 |
76 | # Aggregate consecutive daily-data up to this number of days
77 | NDAYS = 3
78 |
79 | #
80 | # vowpal-wabbit training args
81 | #
82 | VW_ARGS = \
83 | -k \
84 | --loss_function quantile \
85 | --progress 1 \
86 | --bootstrap $(BS) \
87 | -l $(L) \
88 | --l2 $(L2) \
89 | -c --passes $(P)
90 |
91 | # -- Commented out random shuffling methods
92 | # now sorting examples by abs(delta).
93 | # Overfitting is countered (though not completely avoided) by:
94 | # * Aggregating on multiple partly overlapping N-day periods
95 | # * Bootstrapping each example (multiple times) via --bootstrap
96 | #
97 | # Mutliple orders via shuffling and averaging results should be
98 | # considered as a future option.
99 | #
100 | # SHUFFLE := shuf
101 | # SHUFFLE := unsort --seed $(SEED)
102 | #
103 |
104 | # -- data files
105 | MASTERDATA = $(NAME).csv
106 | TRAINFILE = $(NAME).train
107 | ITEMFILE = $(NAME).items
108 | MODELFILE = $(NAME).model
109 | RANGEFILE = $(NAME).range
110 | DWCSV := weight.2015.csv
111 | DWPNG := $(NAME).weight.png
112 | SCPNG := $(NAME).scores.png
113 |
114 | .PRECIOUS: Makefile $(MASTERDATA) $(TOVW)
115 |
116 | #
117 | # -- rules
118 | #
119 | all:: score
120 |
121 | s score scores.txt: $(TRAINFILE)
122 | $(VARINFO) $(VW_ARGS) -d $(TRAINFILE) | tee scores.txt
123 |
124 | c charts: weight-chart score-chart
125 |
126 | # -- Weight by date chart
127 | wc weight-chart $(DWPNG): date-weight.r $(DWCSV)
128 | Rscript --vanilla date-weight.r $(DWCSV) $(DWPNG)
129 | @echo "=== done: date-weight chart saved in: '$(DWPNG)'"
130 |
131 | # -- Feature importance score chart
132 | sc score-chart $(SCPNG): scores.txt score-chart.r
133 | @perl -ane '$$F[5] =~ tr/%//d ;print "$$F[0],$$F[5]\n"' scores.txt > scores.csv
134 | @Rscript --vanilla score-chart.r scores.csv $(SCPNG)
135 | @echo "=== done: weight-loss factors chart saved in: '$(SCPNG)'"
136 |
137 | # -- model
138 | m model $(MODELFILE): Makefile $(TRAINFILE)
139 | $(VW) $(VW_ARGS) -f $(MODELFILE) -d $(TRAINFILE)
140 |
141 | # -- train-set generation
142 | t train $(TRAINFILE): Makefile $(MASTERDATA) $(TOVW)
143 | $(TOVW) $(NDAYS) $(MASTERDATA) | sort-by-abs > $(TRAINFILE)
144 |
145 | # -- generate 'by single-item' train file
146 | i items $(ITEMFILE): $(TRAINFILE)
147 | train-to-items $(TRAINFILE) > $(ITEMFILE)
148 |
149 | # -- Find daily 'range' for 'per-item'
150 | # This finds a ~90% confidence interval (leverages vw --bootstrap)
151 | conf confidence r range $(RANGEFILE): $(MODELFILE) $(ITEMFILE)
152 | $(VW) --quiet -t -i $(MODELFILE) \
153 | -d $(ITEMFILE) -p /dev/stdout | sort -g > $(RANGEFILE)
154 |
155 | # -- convergence chart
156 | conv: $(TRAINFILE)
157 | $(VW) $(VW_ARGS) -d $(TRAINFILE) 2>&1 | vw-convergence
158 |
159 | clean:
160 | /bin/rm -f $(MODELFILE) $(ITEMFILE) $(RANGEFILE) *.cache* *.tmp*
161 |
162 | # -- more friendly error if original data doesn't exist
163 | $(MASTERDATA):
164 | @echo "=== Sorry: you must provide your data in '$(MASTERDATA)'"
165 | @exit 1
166 |
167 | # commit and push
168 | cp:
169 | git commit . && git push
170 |
171 | # sync gh-pages with master & push
172 | gh:
173 | git checkout gh-pages && \
174 | git merge master && \
175 | git push && \
176 | git checkout master
177 |
178 | #
179 | # Trick for introspection of this Makefile variables from the outside
180 | # (Needs VARNAME=):
181 | #
182 | # Examples:
183 | # $ make VARNAME=MASTERDATA echovar
184 | #
185 | # $ make VARNAME=TRAINFILE ev
186 | #
187 | ev echovar:
188 | @echo $($(VARNAME))
189 |
190 |
--------------------------------------------------------------------------------
/QandA.md:
--------------------------------------------------------------------------------
1 | # Frequently asked Questions, Answers, & Comments
2 |
3 | ------------------
4 |
5 | #### I'm not a coder, this is too difficult for me to use; can you help?
6 |
7 | > I hear you. One reason I put this on github is so others can take it further: write a web or mail service on top, or even a smart-phone app.
8 | >
9 | > Since publishing I've become aware of one site using this work as a back-end. Please note this is very early, experimental, work in progress:
10 | >
11 | > ***[weightbrains.com](http://weightbrains.com) alas, link is now dead***
12 | >
13 | > It would be cool to see more people taking this work further.
14 | >
15 |
16 |
17 | #### This is an example of how to not do scientific research!
18 |
19 | > Indeed, this is not scientific research. Rather, this is:
20 | >
21 | > - Software that can help you make sense of your own data
22 | > - Sharing in hope of further work and improvement
23 | > - A personal story of discovery
24 | > - In the end (judging by results) - a success story
25 | >
26 |
27 |
28 | #### Isn't factor X missing (e.g. Coffee)?
29 |
30 | > Yes, many additional factors are missing. I encourage you to use your own data, and what you personally feel is most important.
31 |
32 |
33 | #### Doesn't dehydration and depleting Glycogen in the liver explain most of this weight loss?
34 |
35 | > For the 1st 24 hours of a diet, and about 1 Lb of loss (approx. weight of total body glycogen) yes. However, I can't attribute over 20 Lb loss over 1 year to just water loss.
36 |
37 | #### Doesn't caloric restriction explain all of this weight loss?
38 |
39 | > This may well be true. I haven't counted calories and a more thorough experiment should add all the data it can use.
40 | >
41 | > However, I tried multiple times to restrict my diet and was not successful. So even if "caloric restriction" is the best explanation, it comes short as "the solution".
42 | >
43 | > What led to success in my case is a combination of physiological and psychological factors. Most importantly, the realization that by trying a LCHF (Low Carb, High Fat) diet, I can sustain a diet regime (possibly calorie restricted, I don't know) that lead to a clear and sustainable weight loss.
44 | >
45 | > The story is about the discovery process of what worked for me in the end.
46 |
47 | #### Machine learning: aren't you over-fitting the data?
48 |
49 | > Possibly. The number of days in the data (about 120) may be too small, and my scales have low resolution. There may have other data-entry errors in it.
50 | >
51 | > OTOH: I tried many combinations, data-subsets, shuffling, bootstrapping, and while the details varied, the main conclusions were pretty consistent: longer sleep, Low carb, high fat, were leading me to losing weight.
52 | >
53 | > More data, and data from many people is always welcome.
54 | >
55 |
56 | #### Machine learning: how much data do I need?
57 |
58 | > The more the better. More importantly: higher resolution scales (0.1 lb or less) are especially important.
59 | >
60 | > To increase the sample size, the most recent version of the software no longer uses each day as a single data point. It augments it in two ways:
61 | > - It applies a variable-length sliding window on 1..N consecutive days over the data to auto-generate additional data-points.
62 | > - It applies randomized bootstrapping on each data example. This adds another multiplier towards reducing random noise and variance.
63 | >
64 | > You're welcome to play with the Makefile `NDAYS` (max length of consecutive days of the sliding window) and `BS` (number of random bootstrapping rounds multiplier for each data-set example) parameters to see how much results change, and which part remains relatively stable.
65 | > For example:
66 | >
67 | > make BS=7 NDAYS=5
68 | >
69 | > My conclusions were that while some items in the middle fall into the "too little data to conclude from" category, items closer to the top/bottom, as a group, point to sleeping (fasting) longer and substituting carbs for fat, as likely weight-loss factors.
70 |
71 | #### What's the story on statins? How is this related to weight-loss?
72 |
73 | > [Original appeared in the HackerNews thread, some edits applied]
74 | >
75 | > Here's my personal experience with statins. I may be wrong, but I'm following my compass while always open to be proven otherwise.
76 | >
77 | > Doctor: "Your 'bad cholesterol' is borderline, I want you to start taking "Lipitor"...
78 |
79 | > Observation: when the 'Lipitor' patent expired, and it became a cheap generic drug, the suggestion turned into 'Crestor' which I learned has a bit longer effective half-life, and way higher price.
80 |
81 | > ***Me: (while adopting a different diet)***
82 | > *"Hmm statins would have taken my cholesterol and triglicerids _maybe_ 3% lower and here I am 20% lower after a year of a simple, self-studied, diet change. Maybe there are better ways to lower the so called 'bad cholesterol'?*
83 |
84 | > Further study: there's always a new statin the moment the previous patent expires.
85 | >
86 | > Check out the following names on Wikipedia:
87 | >
88 | > - Compactin
89 | > - Simvastatin
90 | > - Fluvastatin
91 | > - Cerivastatin
92 | > - Atorvastatin
93 | > - Rosuvastatin
94 | >
95 | > These are all chemical names, not brand names, the last two on the list are the brands: known as "Lipitor" and "Crestor".
96 | >
97 | > So I don't know. I'm 100% sure all my doctors are well meaning and caring and I have nothing against them, but my confidence in such health suggestions, in research funded by big-pharma, and in the new great statin de-jour while America keeps getting obese and less healthy, is, how can I put it? A bit shaken.
98 | >
99 | > Again, just prove me wrong, and I'll change my view.
100 |
101 |
102 | #### Can you tell the story of how, and how does it feel to "Go Viral"?
103 |
104 | >
105 | > From my PoV, it was totally accidental.
106 | >
107 | > I was trying to lose weight, and since I'm into ML, I found it natural to try some machine learning to guide me, especially in the early stages. It helped.
108 | >
109 | > When I told this to a colleague of mine, she suggested "You heave to share this more widely!" So I dusted it up a bit, cleaned the code to make it a bit more usable by others, wrote a few pages of background and put it on github.
110 | >
111 | > On github it sat for a few months with zero forks, zero stars, zero watchers, totally ignored by the world :-)
112 | >
113 | > Then some guy named Dmitri noticed it, and posted a link to [Hacker News](http://ycombinator.com/news)
114 | >
115 | > The reaction was overwhelming.
116 | >
117 | > Some minutes later, I got an email from someone else I don't know, Victor, via github, saying "Congratulations, you've just made it to the top of HN..."
118 | >
119 | > It was a Friday evening. That's weekend I couldn't sleep. My mailbox was exploding too. It went viral over the "interwebs". Hundreds of comments, thousands of stars on the github repository, many forks, people offering me jobs, invites to present at conferences, you name it.
120 | >
121 | > There were some follow-ups on Reddit, Quora, and some other forums, and it quickly became the top unpaid link on google when searching for a few terms combining weight-loss and machine-learning.
122 | >
123 | > In moments like this, one wishes they had two copies of oneself. I have another life too.
124 | >
125 |
--------------------------------------------------------------------------------
/ariel.csv:
--------------------------------------------------------------------------------
1 | #
2 | # vim: textwidth=0 nowrap
3 | # Diet data file: 2012-06 and on
4 | #
5 | # 'sleep' (one unit) means _at least_ 8 hours of sleep.
6 | #
7 | # Don't add 'sleep' unless you slept more than 8 hours.
8 | # If you slept less than 8 hours, use 'nosleep' instead.
9 | #
10 | # To emphasize any factor, i.e. make it more important/pronounced,
11 | # in the importance order you need to _decrease_ its weight.
12 | #
13 | # This is due to the "less quantity drives more effect" principle
14 | # when calculating _importance_ (effect on the target feature).
15 | #
16 | # e.g, if you avoid carbs, and one day you eat just one
17 | # slice of bread, say 0.5 of a normal portion, you could do:
18 | #
19 | # bread:0.5
20 | #
21 | # Which (perhaps unintuitively) means double, the importance
22 | # of the eating bread effect, rather than halve it.
23 | #
24 | # IOW: if you eat "more of X", then every unit/gram of X
25 | # has relatively less effect given the weight-gain is the same.
26 | #
27 | # The default factor weight (when missing) is 1.0
28 | #
29 | Date,MorningWeight,YesterdayFactors
30 | 2012-06-01,186.8,
31 | 2012-06-10,185.9,
32 | 2012-06-11,182.6,salad sleep:0.15 cheese egg halfnhalf:1.5
33 | 2012-06-12,181.0,sleep:0.5 tea grape
34 | 2012-06-13,183.6,bagel bread date:1.5 dietsnapple splenda pizza nosleep:0.2
35 | 2012-06-14,183.7,coffeecandy cheese rice salmon peanut bread
36 | 2012-06-15,183.4,salmon salad cherry:0.7 dietsnapple tea potato oliveoil cheese peanut
37 | 2012-06-16,183.8,bread avocado:10 sugaryogurt dietsnapple peanut ketchup
38 | 2012-06-17,182.6,salmon pistachio peanut cheese
39 | 2012-06-18,182.4,bread:0 peanut bagel cheese cherry soup
40 | 2012-06-19,182.4,cherry knish cheese salmon pistachio:0.7 peanut
41 | 2012-06-20,183.0,sorbet granolabar peanut:8 pistachio snapple sugaryogurt
42 | 2012-06-21,182.6,cheese salad peanut beans:0.2 soup dietsnapple salmon carrot artichokepie
43 | 2012-06-22,182.0,cheese salad peanut:0.5 meatball tea crab cherry pistachio atkinsbar egg pizza
44 | 2012-06-23,182.8,bread sorbet coffeecandy peanut:0.5 snapple
45 | 2012-06-24,182.4,peanut:0.5 sugaryogurt egg pizza cheese
46 | 2012-06-25,182.2,sugaryogurt peanut:0.5 pistachio:0.5 tea salad egg pizza
47 | 2012-06-26,182.4,snapple rice salad meat bread peanut:0.5 pistachio:0.5 peanuts:0.4 ketchup
48 | 2012-06-27,183.0,indian mango rice snapple danish pistachio:0.7 cheese atkinsbar peanut:0.5 chocolate
49 | 2012-06-28,183.0,egg splenda salmon beans salad dietsnapple rice atkinsbar nectarine:0.7 peanut
50 | 2012-06-29,183.7,egg pizza splenda sugaryogurt peanut:0.5 bagel:0.5 coffeecandy breadedchicken trailmix ketchup
51 | # Break, vacation in NYC, didn't keep track
52 | 2012-07-08,181.8,
53 | 2012-07-09,182.2,bun pistachio peanut:0.7 ketchup quinoa peas pizza egg splenda nosleep:0.5 melon bread:0.4 watermelon:0.7
54 | 2012-07-10,181.2,carrot chicken salad dietsnapple:1 snapple plum mottsfruitsnack cheese sugaryogurt tea oliveoil
55 | 2012-07-11,181.8,bagel:0.7 dietsnapple icecreamsandwich pistachio pizza splenda cocoa
56 | 2012-07-12,180.8,chicken milk beans salad dietsnapple pizza cocoa splenda pistachio peanut cheese tea oliveoil potato
57 | 2012-07-13,180.8,pizza cocoa splenda peanut cheese coffeecandy chicken olives brownie meatball sorbet stayhome
58 | 2012-07-14,181.4,bread dietsnapple pizza cocoa splenda cheese nectarine brownie
59 | 2012-07-15,182.4,nosleep:0.5 caramelizedwalnut friedrice sorbet:0.5 peanut pistachio meatball
60 | 2012-07-16,182.2,frenchtoast sugaryogurt peanut:0.7 egg brownie pistachio coffeecandy:0.7
61 | 2012-07-17,182.0,milk chicken salad tea potato oliveoil grape pistachio peanut pizza cocoa splenda mottsfruitsnack icecream:0.3
62 | 2012-07-18,181.8,gyoza tempura sushi chicken dietsnapple pizza cocoa splenda soup
63 | 2012-07-19,181.0,hotdog potato beans salad coleslaw dietsnapple pizza cocoa splenda icecream peanut atkinsbar tea oliveoil parmesan grape
64 | 2012-07-20,181.2,breadedchicken veggieburger salad thaisoup sorbet:.2 pizza cocoa splenda dietsnapple indian danish beans melon grape trailmix peanut stayhome
65 | 2012-07-21,182.0,caramelizedwalnut peanut soup avocado cheeseburger pizza cocoa splenda beet chocolate coffeecandy pistachio:0.5
66 | 2012-07-22,181.4,salad icecream tea oliveoil soup peanut pizza chocolate coconut
67 | 2012-07-23,180.8,egg pizza splenda sugaryogurt watermelon atkinsbar peanut pistachio cheese potato fajita
68 | 2012-07-24,180.8,salad chicken butter bread:0.5 pizza cocoa dietsnapple splenda peanut sugaryogurt meatball cheese fajita sorbet:0.2
69 | 2012-07-25,180.6,milk chicken salad cheese pizza cocoa splenda dietsnapple sugaryogurt beans peanut mottsfruitsnack
70 | 2012-07-26,180.6,chicken bbqsauce milk coleslaw tea pistachio pizza cocoa splenda salad gummybears coffeecandy:0.5 brownie:.4 dietsnapple
71 | 2012-07-27,181.0,meat bread grilledonion brownie coffeecandy sorbet cheese
72 | 2012-07-28,181.0,peanut granola cookie salad soup splenda cocoa
73 | 2012-07-29,181.4,salad bacon bread croissant icecreamsandwich sorbet breadedchicken bbqsauce cheese whitenectarine peanut pizza frappuccino splenda
74 | 2012-07-30,181.2,pizza splenda whitenectarine tea pistachio peanut sugaryogurt
75 | 2012-07-31,181.4,breadedchicken milk salad peach plum ketchup peanut pistachio cornbread
76 | 2012-08-01,181.6,bagel peanut pizza splenda
77 | 2012-08-02,181.2,chicken milk salad cheese pizza splenda cocoa
78 | 2012-08-03,180.8,pizza splenda peanut sorbet stayhome
79 | 2012-08-04,181.8,bagel beans cheeseburger whitenectarine cheese caramelizedwalnut peanut pistachio tofee coffeecandy pizza cocoa splenda chicken carrot soup pasta dietsnapple bread
80 | 2012-08-05,181.4,pizza meatball salad coffeecandy tea oliveoil grape peanut cheese brownie pizza parmesan potato
81 | 2012-08-06,182.0,coconut chocolate nosleep frappuccino:0.2 pizza peanut pistachio coconutbar
82 | 2012-08-07,183.4,indian danish breadedchicken rice brownie pizza cocoa splenda dietcoke:0.3 pistachio:0.5 peanut:0.7 coffeecandy tofee:0.7 chocolate coconut driedapple
83 | 2012-08-08,183.6,pizza cocoa splenda dietsnapple dietcoke icecream frappuccino:0.2 peanut tofee nectarine salad
84 | 2012-08-09,182.4,pizza cocoa splenda dietsnapple salad tea potato milk tilapia peanut atkinsbar cheese sleep:0.3 exercise:0.45 strawberry
85 | 2012-08-10,183.0,pizza frappuccino:0.2 peanuts tofee:0.7 kettlecorn
86 | 2012-08-11,183.6,breadedshrimp beef thinkthinbar dietsnapple dietcoke
87 | 2012-08-12,183.8,brownrice pizza shrimp whitenectarine cheese atkinsbar breadedchicken chocolate coconut peanut avocado sugaryogurt
88 | 2012-08-13,184.2,schnitzel bagel pizza cocoa splenda dietsnapple salad icecream sugaryogurt peanut
89 | 2012-08-14,183.4,bagel salad cocoa pizza dietsnapple splenda sugaryogurt cheese oliveoil
90 | 2012-08-15,182.4,pizza cocoa bread cheese oliveoil frappuccino:0.2 stayhome sorbet dietsnapple sleep:0.5
91 | 2012-08-17,181.4,sleep stayhome peanut pizza egg whitenectarine smallveggiewraps:0.7
92 | 2012-08-18,181.4,bagel beans peanut dietsnapple salad
93 | 2012-08-19,181.0,sleep stayhome watermelon melon meatball salad whitenectarine pizza
94 | 2012-08-20,181.4,
95 | 2012-08-21,182.0,sushi breadedchicken pizza cocoa dietsnapple pistachio peanut brownie atkinsbar cheese
96 | 2012-08-22,182.0,salad cheese nuggets dietsnapple pistachio pizza frappuccino nectarine ham eggpie sausage
97 | 2012-08-23,181.6,falafel:7 pita tea pizza frappuccino:0.2 pistachio nectarine sugaryogurt stayhome
98 | 2012-08-24,182.0,bread dietsnapple pizza cocoa splenda kettlecorn whitenectarine
99 | 2012-08-25,182.6,meat frenchfries ketchup sorbet peanut salad coffeecandy kettlecorn mottsfruitsnack
100 | 2012-08-26,183.0,frappuccino pizza bagel:0.8 kettlecorn coffeecandy cheese beef
101 | 2012-08-27,182.0,stayhome sleep:0.7 peanut salad whitenectarine pizza frappuccino
102 | 2012-08-28,182.4,nosleep couscous pita chicken salad pizza splenda cocoa dietsnapple
103 | 2012-08-29,181.4,sleep nectarine pizza cocoa cheeseburger cheese peanut
104 | 2012-08-30,180.8,nectarine pizza cocoa milk salad greenbeans chocolate rizo sleep:0.7
105 | 2012-08-31,180.0,sleep:0.5
106 | 2012-09-01,180.9,bread:0.7 pizza cocoa splenda peanut sugaryogurt dietsnapple nectarine cheese coffeecandy:6
107 | 2012-09-02,180.0,sleep stayhome cheese pizza tea grape icecream
108 | 2012-09-03,181.2,peanut bun melon cheese sugarlemonade sugaryogurt pasta
109 | 2012-09-04,,
110 | 2012-09-05,,
111 | 2012-09-05,,
112 | 2012-09-07,180.0,stayhome
113 | 2012-09-08,181.0,bread:2 peanut pizza cheese
114 | 2012-09-09,180.2,sleep stayhome salad bacon pizza frappuccino nuggets cheese
115 | 2012-09-10,,
116 | 2012-09-11,181.2,
117 | 2012-09-12,180.8,
118 | 2012-09-13,180.8,
119 | 2012-09-14,180.4,stayhome salad pistachio pizza egg clementine coffeecandy
120 | 2012-09-15,181.4,falafel giro clementine:0.5 pistachio dietsnapple pizza cocoa frenchfries breadedshrimp icecream coffeecandy
121 | 2012-09-16,181.4,
122 | 2012-09-17,182.0,salad gefiltefish chicken wine beet kettlecorn bread
123 | 2012-09-18,182.2,kettlecorn cheese wine chicken bread
124 | 2012-09-19,182.2,sushi gyoza sweetjapanese cheese wine chicken dietsnapple
125 | 2012-09-20,182.0,indian turmeric rice chicken fried spinach wine dietsnapple pomegranate sugaryogurt
126 | 2012-09-21,181.4,stayhome musaka cheese coffeecandy clementine pizza egg pistachio nectarine:0.5 date
127 | 2012-09-22,182.8,rogalach:0.7 nathans:0.7 hummusroll bread salad tomatosoup cocoa pizza splenda dietsnapple
128 | 2012-09-23,183.4,bread apple hummus pita danish
129 | 2012-09-24,182.4,chicken olives wine peanut sugaryogurt fish stayhome
130 | 2012-09-25,181.0,sleep falafel chicken milk salad olives dietsnapple mottsfruitsnack
131 | 2012-09-26,183.6,mottsfruitsnack:2 bagel bread nosleep:0.5
132 | 2012-09-27,182.5,sleep tea cocoa pizza splenda dietsnapple
133 | 2012-09-28,182.4,salad
134 | 2012-09-29,181.6,sleep grape cheese wine salad thinpretzel
135 | 2012-09-30,183,nosleep bread clementine quesadilla corn
136 | 2012-10-01,182.2,salad tea peanut chocolate stayhome pizza splenda potato wine
137 | 2012-10-02,183.0,wine cookie:0.5 pita hummus peanut bread beef
138 | 2012-10-03,,
139 | 2012-10-04,,
140 | 2012-10-05,,
141 | 2012-10-06,182.4,
142 | 2012-10-07,181.8,sleep wine cheese
143 | 2012-10-08,,nectarine cheese pistachio coffeecandy splenda pizza frappuccino sugaryogurt thinpretzel clementine creamcheese
144 | 2012-10-09,183.8,bread bagel
145 | 2012-10-10,183.2,
146 | 2012-10-11,183.6,nosleep rice pistachio peanut greenbeans bread cheese
147 | 2012-10-12,,
148 | 2012-10-13,183.6,
149 | 2012-10-14,184.2,nosleep bread beef
150 | 2012-10-15,182.6,salad sleep bacon egg splenda icecream
151 | 2012-10-16,184.7,bread bagel beef
152 | 2012-10-17,183.5,tea egg oliveoil parmesan:2
153 | 2012-10-18,182.2,earthbalance:0.7 tea havarti oliveoil bacon parmesan
154 | 2012-10-19,,
155 | 2012-10-20,,
156 |
157 |
--------------------------------------------------------------------------------
/vw-varinfo2:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # vim: ts=4 sw=4 expandtab
4 | """
5 | vw-varinfo2: vw dataset summary & variable importance
6 |
7 | This is new & simpler implementation of the original vw-varinfo
8 |
9 | It is designed to be simpler and faster. There's less dependence
10 | on command line options so it is much more robust against future
11 | changes and new options in vowpal-wabbit.
12 |
13 | This implementation is in python (original was in perl)
14 |
15 | TODO: multi-class support is not implemented!
16 |
17 | Author: Ariel Faigon (2016)
18 | """
19 | import sys
20 | import os
21 | import subprocess
22 | import re
23 | import itertools
24 | import tempfile
25 | import traceback
26 |
27 | from operator import itemgetter
28 |
29 | Verbose = False
30 | ARGV0 = os.path.basename(sys.argv[0])
31 |
32 | # Default vw executable program to call
33 | VW = 'vw'
34 | # Additional VW args which should be reused from 1st to 2nd pass
35 | VWARGS = []
36 |
37 | # Hash mappings for per-feature (min, max, hash-value, weight)
38 | F_MIN = {}
39 | F_MAX = {}
40 | F_HASH = {}
41 | F_WEIGHT = {}
42 |
43 | # We need to have a model at the end to load all weights
44 | # If it is not supplied on the command line, we add it ourselves
45 | ModelName = ''
46 | CleanupModel = False
47 |
48 | # A global switch and list of all seen labels to support MultiClass
49 | MultiClass = False
50 | MCLabels = None
51 |
52 | def v(msg):
53 | """print message to stderr"""
54 | sys.stderr.write("%s\n" % msg)
55 | sys.stderr.flush()
56 |
57 | def d(msg):
58 | """Verbose/debugging message, activated with '-v' option."""
59 | if not Verbose:
60 | return
61 | v(msg)
62 |
63 | def fatal(msg):
64 | """fatal (can't continue) situation error message"""
65 | v("== FATAL: %s" % msg)
66 | sys.exit(1)
67 |
68 | def usage(msg):
69 | """Print usage message and exit"""
70 | if msg:
71 | v(msg)
72 | v("Usage: %s [-v] [] [] ..." % ARGV0)
73 | v(" Notes:\n"
74 | "\t- You may omit the argument (default is 'vw')\n"
75 | "\t- You may use a different executable as the 1st arg\n"
76 | "\t- are all the vw arguments, as you would call vw directly\n"
77 | "\t- If is just a dataset-file - vw defaults will be used\n"
78 | "\t- To lose the constant (intercept), use vw's '--noconstant' option\n"
79 | "\t However the constant may be useful to show if there's a bias")
80 | sys.exit(1)
81 |
82 |
83 | def which(program):
84 | """
85 | Find a program in $PATH
86 | If found, return its full path, otherwise return None
87 | """
88 | def is_exe(fpath):
89 | """Return True if fpath is executable, False otherwise"""
90 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
91 |
92 | fpath, _ = os.path.split(program)
93 | if fpath:
94 | if is_exe(program):
95 | return program
96 | else:
97 | for path in os.environ["PATH"].split(os.pathsep):
98 | path = path.strip('"')
99 | exe_file = os.path.join(path, program)
100 | if is_exe(exe_file):
101 | return exe_file
102 |
103 | return None
104 |
105 |
106 | def all_features_dicts():
107 | """
108 | Returns two dict of all features in a structured way:
109 | 1st dict is individual features: scalar keys with a value of 1
110 | 2nd dict is for features within name-spaces, key is the name-space
111 | first dict is:
112 | {
113 | # individual (not in a name-space) features:
114 | "f1": 1,
115 | "f2": 1,
116 | "fN": 1
117 | }
118 | second dict is:
119 | {
120 | # features in name-spaces:
121 | "namespace1": { "f1":1, "f2":1, ... },
122 | "namespace2": {"f1":1, "f2":1, ... },
123 | }
124 | """
125 | d1 = {}
126 | d2 = {}
127 | for k in F_HASH:
128 | if '^' in k:
129 | ns, fname = k.split('^', 1)
130 | if ns not in d2:
131 | d2[ns] = {}
132 | d2[ns][fname] = 1
133 | else:
134 | # Constant feature should never be added as a regular
135 | # feature. vw adds it by itself as needed.
136 | # TODO: multiclass uses separate Constant_ per class
137 | if k != 'Constant':
138 | d1[k] = 1
139 |
140 | return d1, d2
141 |
142 |
143 | def all_features_example():
144 | """Return a equal-weight vw line with all features present"""
145 | # TODO: implement multi-class: needs per-class internal data-structs
146 | d1, d2 = all_features_dicts()
147 | individual_features = ' | ' + ' '.join(d1.keys())
148 | ns_features = []
149 | for ns in d2:
150 | fnames = d2[ns].keys()
151 | one_ns_features = " |%s %s" % (ns, ' '.join(fnames))
152 | ns_features.append(one_ns_features)
153 |
154 | example = '1' + individual_features + ' '.join(ns_features) + '\n'
155 | d("all_features_example: %s" % example)
156 | return example
157 |
158 |
159 | def process_audit_line(line):
160 | """
161 | Process an audit line coming from 'vw'
162 | Track min/max/hash-value/weight for each feature
163 | """
164 | features = line.split("\t")
165 | features.pop(0)
166 | for f in features:
167 | fields = f.split(':')
168 | fname = fields[0]
169 |
170 | if fname == '':
171 | # don't process 'empty' features
172 | continue
173 |
174 | fhash = int(fields[1])
175 | fval = float(fields[2])
176 | fweight = float(fields[-1].split('@')[0])
177 |
178 | F_WEIGHT[fname] = fweight
179 | F_HASH[fname] = fhash
180 |
181 | if fname not in F_MIN:
182 | # feature seen for 1st time
183 | F_MIN[fname] = fval
184 | F_MAX[fname] = fval
185 |
186 | if fval < F_MIN[fname]:
187 | F_MIN[fname] = fval
188 | if fval > F_MAX[fname]:
189 | F_MAX[fname] = fval
190 |
191 |
192 | def vw_audit(vw_cmd, our_input=None):
193 | """
194 | Generator for vw audit-lines
195 | (Each example is mapped to its audit-line)
196 |
197 | vw_cmd is a list of args to run vw with (vw command line)
198 |
199 | There are two modes of running:
200 | 1) Normal: input provided directly to vw from command line
201 | 2) 2nd pass: input provided by vw-varinfo as a string
202 | This mode is activated when our_input="some string..."
203 | """
204 | if our_input:
205 | # Input comes from our_input (string)
206 | # which is sent via stdin to the vw subprocess
207 | vw_proc = subprocess.Popen(
208 | vw_cmd,
209 | stdout=subprocess.PIPE,
210 | stderr=subprocess.STDOUT,
211 | stdin=subprocess.PIPE,
212 | bufsize=1048576
213 | )
214 | # python3 expects a bytes-like object
215 | # Hence encoding the string our_input
216 | vw_proc.stdin.write(our_input.encode())
217 | vw_proc.stdin.close()
218 | else:
219 | # By default, vw reads from a training-set
220 | # which is provided on the command line
221 | vw_proc = subprocess.Popen(
222 | vw_cmd,
223 | stdout=subprocess.PIPE,
224 | stderr=subprocess.STDOUT,
225 | close_fds=False,
226 | bufsize=1048576
227 | )
228 |
229 | example_no = 0
230 |
231 | while True:
232 | # Since encoded string was written
233 | # therefore it needs to be decoded
234 | vw_line = vw_proc.stdout.readline().decode()
235 | if not vw_line:
236 | # End of input
237 | vw_proc.stdout.close()
238 | vw_proc.wait()
239 | if vw_proc.returncode:
240 | # non-zero exit code, print the full command that
241 | # failed to help user reproduce/understand it
242 | fatal("vw subprocess failed (status=%s):\n\t%s\n"
243 | "(Run the command above to reproduce the failure)" %
244 | (vw_proc.returncode, ' '.join(vw_cmd)))
245 | else:
246 | # everything looks cool, support debugging anyway
247 | d("%s: %s examples, exit status: %s" %
248 | (vw_cmd, example_no, vw_proc.returncode))
249 |
250 | return
251 |
252 | # An audit line is recognized by a leading-tab
253 | if vw_line[0] == '\t':
254 | # An audit line (what we're looking for)
255 | example_no += 1
256 | d(vw_line)
257 | yield vw_line
258 | continue
259 |
260 | # print("vw_line=[%s] len=%d not(vw_line)=%s" % (vw_line, len(vw_line), (not vw_line)))
261 | # time.sleep(0.0001)
262 |
263 | # Q: anything we want to do with other lines?
264 | # A: for now no, we just read the next line from vw
265 |
266 | return
267 |
268 | def run_vw(vw_cmd, our_input=None):
269 | """Track all variables and their weights via vw --audit lines"""
270 | for line in vw_audit(vw_cmd, our_input):
271 | process_audit_line(line)
272 |
273 |
274 | def is_vw_arg(arg):
275 | """
276 | Return True iff the arg looks like a 'vw' argument
277 | Side effect: modifies the VW global variable iff user uses
278 | a different vw
279 | """
280 | global VW
281 | if arg == VW:
282 | return True
283 | if re.search(r'(?:^|[\\/])vw[-_.0-9]*(\.exe)?$', arg):
284 | VW = arg
285 | return True
286 | return False
287 |
288 |
289 | def already_has_audit(args):
290 | """Return True iff args already include --audit (or -a)"""
291 | if '-a' in args or '--audit' in args:
292 | return True
293 | return False
294 |
295 |
296 | def is_multiclass(args):
297 | """
298 | Check args for any hint of a multiclass problem
299 | (Check is option dependent and may be incomplete)
300 | """
301 | # Not sure if --wap, --ect multi-class are actually right
302 | for mc_opt in ('--oaa', '--csoaa', '--ect', '--wap', '--sequence'):
303 | if mc_opt in args:
304 | return True
305 | return False
306 |
307 |
308 | def model_arg(args):
309 | """Return the model arg if any"""
310 | f_idx = None
311 | try:
312 | f_idx = args.index('-f')
313 | except:
314 | # not there
315 | return None
316 |
317 | try:
318 | f_idx += 1
319 | model = args[f_idx]
320 | except:
321 | fatal("Oops! -f withot an arg - can't continue")
322 |
323 | return model
324 |
325 |
326 | def get_vw_cmd(args):
327 | """
328 | Return the vw command we want to run
329 | This means stripping our own (vw-varinfo) name from the list
330 | and making sure:
331 | 1) That we have 'vw' at the beginning
332 | 2) That -a is added for auditing
333 | """
334 | global ModelName, CleanupModel, Verbose
335 |
336 | if len(args) <= 1:
337 | usage('')
338 |
339 | # -- move ourselves (vw-varinfo arg) out of the way
340 | args.pop(0)
341 |
342 | # 1st arg can be '-v' for debugging this script
343 | if len(args) > 0 and args[0] == '-v':
344 | Verbose = True
345 | args.pop(0)
346 |
347 | vw_args = []
348 |
349 | if len(args) < 1:
350 | usage('Too few args: %s' % args)
351 |
352 | if not is_vw_arg(args[0]):
353 | if os.name == 'nt':
354 | args.insert(0, 'vw.exe')
355 | else:
356 | args.insert(0, 'vw')
357 |
358 | if not already_has_audit(args):
359 | args.insert(1, '--audit')
360 |
361 | if '--noconstant' in args:
362 | VWARGS.append('--noconstant')
363 |
364 | model = model_arg(args)
365 | if model:
366 | ModelName = model
367 | else:
368 | ModelName = tempfile.mktemp(suffix='.vwmodel')
369 | args.insert(1, ModelName)
370 | args.insert(1, '-f')
371 | CleanupModel = True
372 |
373 | # TODO: skip leading options that are intended for vw-varinfo itself
374 | for arg in args:
375 | vw_args.append(arg)
376 |
377 | d("vw_cmd is: %s" % vw_args)
378 | vw_exe = vw_args[0]
379 | if which(vw_exe) is None:
380 | fatal("Sorry: can't find %s (vowpal wabbit executable) in $PATH\n"
381 | "PATH=%s" % (vw_exe, os.environ["PATH"]))
382 |
383 | return vw_args
384 |
385 |
386 | def minmax(data):
387 | """
388 | Return a pair (min, max) of list arg
389 | """
390 | lo = 0
391 | hi = 0
392 | for i in data:
393 | if i > hi:
394 | hi = i
395 | if i < lo:
396 | lo = i
397 |
398 | return lo, hi
399 |
400 |
401 | def summarize():
402 | """Output summary of variables"""
403 | wmin, wmax = minmax(F_WEIGHT.values())
404 | w_absmax = max(abs(wmin), abs(wmax))
405 |
406 | # Print a header
407 | print(("%-16s\t%10s\t%s\t%s\t%s\t%s" %
408 | ('FeatureName', 'HashVal', 'MinVal', 'MaxVal', 'Weight', 'RelScore')))
409 |
410 | # TODO: implement multi-class
411 | # multi-class needs per-class internal data-structs
412 |
413 | # To reverse-order add: 'reverse=True' arg to 'sorted'
414 | # itemgetter: (0): sort-by-key, (1): sort by value
415 | sorted_tuples = sorted(F_WEIGHT.items(), key=itemgetter(1))
416 | for fname, _ in sorted_tuples:
417 | fmin = float(F_MIN[fname])
418 | fmax = float(F_MAX[fname])
419 | fweight = float(F_WEIGHT[fname])
420 | fhash = F_HASH[fname]
421 | relscore = 100.0 * (fweight/w_absmax if w_absmax > 0 else 0.0)
422 | print(("%-16s\t%10s\t%.2f\t%.2f\t%.2f\t%7.2f" %
423 | (fname, fhash, fmin, fmax, fweight, relscore)))
424 |
425 | def pass1(vw_cmd):
426 | """In pass1 we run 'vw' as in the original command"""
427 | d("Starting PASS 1 ...")
428 | run_vw(vw_cmd, None)
429 |
430 | def pass2():
431 | """
432 | Run a 2nd pass with all features and stored model
433 | To get the final weights for all features
434 | """
435 | vw_cmd = [VW, '--quiet', '-t', '-a', '-i', ModelName]
436 | if len(VWARGS) > 0:
437 | vw_cmd += VWARGS
438 | all_features = all_features_example()
439 | d("Starting PASS 2 ...")
440 | run_vw(vw_cmd, all_features)
441 |
442 |
443 | #
444 | # -- main
445 | #
446 | def main():
447 | """Main func for vw-varinfo2: dataset feature information summary"""
448 |
449 | global MultiClass, MCLabels
450 |
451 | try:
452 | vw_cmd = get_vw_cmd(sys.argv)
453 |
454 | if is_multiclass(vw_cmd):
455 | # multi-class needs per-class internal data-structs
456 | MultiClass = True
457 | MCLabels = []
458 |
459 | # Run 1st pass to collect data on all features:
460 | pass1(vw_cmd)
461 |
462 | # Run second pass:
463 | # with -i ModelName and single example w/ all-features present
464 | pass2()
465 |
466 | summarize()
467 |
468 | if CleanupModel:
469 | d("removing tempfile: %s" % ModelName)
470 | os.remove(ModelName)
471 |
472 | except Exception as estr:
473 | # catch-all to cover all unhandled exceptions
474 | fatal("%s\n%s" % (estr, traceback.format_exc()))
475 |
476 | return 0
477 |
478 | if __name__ == "__main__":
479 | sys.exit(main())
480 |
481 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Discovering ketosis: _how to effectively lose weight_
2 | =====================================================
3 |
4 | ### _Here is a chart of my weight vs. time in the past 16 months or so:_
5 |
6 | 
7 |
8 |
9 | The chart was generated from a data-set [`weight.2015.csv`](weight.2015.csv) by the script [`date-weight.r`](date-weight.r) in this git repository. It requires [`R`](http://r-project.org) and [`ggplot2`](http://ggplot2.org/).
10 |
11 |
12 | In the following I'll describe the thought process, some other people ideas, and the code I used to separate signal from noise. This separation was critical to help lead me in the right direction.
13 |
14 | This github repository includes my code, [a Q&A section](QandA.md), and links
15 | for further reading.
16 |
17 |
18 | #### Disclaimers:
19 |
20 | The below is what worked for me. Your situation may be different. Listen to your own body. The code here is designed to be used on your own data, not on mine.
21 |
22 | Also: this was *not* a scientific experiment, or a "study"; rather, it was a personal journey of experimentation and discovery.
23 |
24 | With these behind us, I'd like to channel [Galileo in the face of the inquisition](https://en.wikipedia.org/wiki/Galileo_affair): evolution has been hard at work for 2 billion years shaping the chemistry of all eukaryotes, multi-cellular life and eventually mammals. The Krebs cycle, glucose metabolism, insulin spikes, glycogen in the liver, carnitine, lipase, are as real for you as they are for me. We may be very different in our genes and traits, some are more insulin resistant, for example, but we cannot be too different in our most fundamental metabolic chemistry. The chemistry which drives fat synthesis and break-up.
25 |
26 |
27 | ## Salient facts & initial observations
28 |
29 | - I used to be a pretty thin person. My 1st DMV card below, says 143 lb.
30 | - Unfortunately, since moving to the US, I've been gaining more and more weight. I peaked in 2015, over 50 lbs higher.
31 | - The US is a country where obesity is an epidemic.
32 | - Poorer demographics in the US have higher levels of obesity.
33 |
34 | 
35 |
36 |
37 | Does a US typical lifestyle has anything to do with this epidemic? After reading on the subject, I could point at a few of the main suspects:
38 |
39 | - Fast food is highly available, and is very cheap compared to most alternatives
40 | - Most food we buy and eat is heavily processed -- watch [Food, Inc. (documentary)](http://www.takepart.com/foodinc/film)
41 | - "No Fat" and "Low Fat" labels are everywhere on supermarket shelves
42 | - Many foods are enriched and sweetened with high-fructose corn-syrup -- watch [Sugar Coated (documentary)](http://sugarcoateddoc.com/)
43 |
44 | As in many other instances, I realized I need to think for myself. Ignore all "expert" advice. Question widely accepted ideas like the FDA "food pyramid". Start listening to my own body, my own logic & data I can collect myself and trust.
45 |
46 | Once I did, the results followed.
47 |
48 | ## What didn't work
49 |
50 | In the past, I tried several times to change my diet. After reading one of Atkins' books, I realized, checked, and accepted the fact that excess carbs are a major factor in gaining weight. But that realization alone has not led to success.
51 |
52 | My will power, apparently, was insufficient. I had too much love of pizza and bread. I would reduce my carb consumption, lose a few pounds (typically ~5 pounds), and then break-down, go back to consuming excess carbs, and gain all these pounds back, and then some. My longest diet stretch lasted just a few months.
53 |
54 | It was obvious that something was missing in my method. I just had to find it. I could increase my physical activity, say start training for a mini-marathon, but that's not something I felt comfortable with.
55 |
56 | I realized early on that I need to adopt a lifestyle that not just reduces carbs, or add exercise, but is also sustainable and even enjoyable so it can turn into a painless routine. Something that:
57 |
58 | > - I could do for years
59 | > - Never feel the urge to break habits
60 | > - Is not hard, or unpleasant for me to do
61 |
62 |
63 | ## Early insights & eureka moments
64 |
65 | Early in the process I figured I could use [machine learning](https://en.wikipedia.org/wiki/Machine_learning) to identify the factors that made me gain or lose weight. I used a simple method: every morning I would weigh myself, and record both the new weights and whatever I did in the past ~24 hours, not just the food I ate, but also whether I exercised, slept too little or too much, etc.
66 |
67 | The file I kept was fairly simple. A CSV with 3 columns:
68 |
69 | > *Date*, *MorningWeight*, *Yesterday's lifestyle/food/actions*
70 |
71 | The last column is a arbitrary-length list of *`word[:weight]`* items.
72 |
73 | The (optional) numerical-weight following `:`, expresses higher/lower quantities. The default weight, when missing is 1:
74 |
75 | #
76 | # -- Comment lines (ignored)
77 | #
78 | Date,MorningWeight,YesterdayFactors
79 | 2012-06-10,185.0,
80 | 2012-06-11,182.6,salad sleep bacon cheese tea halfnhalf icecream
81 | 2012-06-12,181.0,sleep egg
82 | 2012-06-13,183.6,mottsfruitsnack:2 pizza:0.5 bread:0.5 date:3 dietsnapple splenda milk nosleep
83 | 2012-06-14,183.6,coffeecandy:2 egg mayo cheese:2 rice meat bread:0.5 peanut:0.4
84 | 2012-06-15,183.4,meat sugarlesscandy salad cherry:4 bread:0 dietsnapple:0.5 egg mayo oliveoil
85 | 2012-06-16,183.6,caprise bread grape:0.2 pasadena sugaryogurt dietsnapple:0.5 peanut:0.4 hotdog
86 | 2012-06-17,182.6,grape meat pistachio:5 peanut:5 cheese sorbet:5 orangejuice:2
87 | # and so on ...
88 |
89 |
90 | Then I wrote [a script](lifestyle-csv2vw) to convert this file to [vowpal-wabbit](https://github.com/JohnLangford/vowpal_wabbit/wiki) training-set regression format. In the converted train-set the label (target feature) is the change in weight (delta) in the past 24 hours, and the input features are what I've done or ate in the ~24 hours leading to this delta -- a straight copy of the 3rd column.
91 |
92 | I was not dieting at that time. Just collecting data.
93 |
94 | The machine learning process error-convergence after partly sorting the lines descending, by `abs(delta)` to smooth it out and try to amplify very weak signals from the data, and 4-passes over the data, looks like this:
95 |
96 | 
97 |
98 | You can reproduce my work by compiling your own data-file, installing all prerequisites, and running `make` in this directory. I wrote a [HOWTO file with more detailed instructions](HOWTO.md). Please open an issue, if anything doesn't work for you.
99 |
100 | When you type `make` in this directory -- some magic happens.
101 |
102 | Here's how a typical result looks like.
103 |
104 | $ make
105 |
106 | ... (output trimmed for brevity) ...
107 |
108 | FeatureName HashVal ... Weight RelScore
109 | nosleep 143407 ... +0.6654 90.29%
110 | melon 234655 ... +0.4636 62.91%
111 | sugarlemonade 203375 ... +0.3975 53.94%
112 | trailmix 174671 ... +0.3362 45.63%
113 | bread 135055 ... +0.3345 45.40%
114 | caramelizedwalnut 148079 ... +0.3316 44.99%
115 | bun 1791 ... +0.3094 41.98%
116 |
117 | ... (trimmed for brevity. Caveat: data is too noisy anyway) ...
118 |
119 | stayhome 148879 ... -0.2690 -36.50%
120 | bacon 64431 ... -0.2998 -40.69%
121 | egg 197743 ... -0.3221 -43.70%
122 | parmesan 3119 ... -0.3385 -45.94%
123 | oliveoil 156831 ... -0.3754 -50.95%
124 | halfnhalf 171855 ... -0.4673 -63.41%
125 | sleep 127071 ... -0.7369 -100.00%
126 |
127 | The positive (top) relative-score values are life-style choices that make you ***gain weight***, while the negative ones (bottom) make you ***lose weight***.
128 |
129 |
130 | ##### And here's a variable-importance chart made from a similar data-set:
131 |
132 |
133 |
134 | Disclaimer: please don't read too much into the particulars of this data. Working with this particular data set, was pretty challenging, since:
135 |
136 | - The number of original data-points (a bit over 100 days) may be too small to establish enough significance.
137 | - Typical daily changes in body weight are very small, often ~0.1 lb.
138 | - My scales are not accurate: you may note that my data has 0.2 pound resolution. This is not ideal. Getting scales with 0.1 pound resolution is highly recommended.
139 | - You may also note that the loss-convergence chart hits a hard floor at ~0.2 even when you do multiple-passes over the data (overfit the training-set) for a similar reason.
140 | - Items that make you lose and gain weight, often appear together on the same line so they cancel each other. This throws the automatic learning process off-course.
141 | - There were some misspellings in the original data (I hope I fixed all of these by now)
142 |
143 | So I focused mostly on the extremes (start and end) of the list as presented above, and just used the hints as general guidance for further study, experimentation, and action.
144 |
145 | Despite the noisy & insufficient data, and the inaccuracies in weighting, the machine-learning experiments made 4 facts pretty clear, pretty early:
146 |
147 | - Sleeping longer consistently appeared as *the* #1 factor in losing weight.
148 | - Lack of sleep did the opposite: too little sleep lead to weight gains.
149 | - Carbs made me gain weight. The worst were high-starch and sugary foods.
150 | - Fatty and oily foods tended to do the opposite: they were positively correlated with weight-loss.
151 |
152 | The 'stayhome' lifestlye, which fell mostly on weekends, may have been a red-herring: I slept longer when I didn't have to commute to work, OTOH: my diet on stay-home days may have been different.
153 |
154 | It took me a while to figure out the sleep part. *When we sleep we don't eat*. It is that simple.
155 |
156 | Moreover: we tend to binge and snack while not particularly hungry, but we never do it during sleep.
157 |
158 | Our sleeping time is our longest daily fasting time.
159 |
160 | Please note that my explanations of the effects may not in fact be accurate or deeply scientific.
161 | The goal of all this was incremental discovery: experiment, check effect, rinse, repeat.
162 |
163 | ## Further progress
164 |
165 | You may note that in the top (date vs. weight) chart there's a notable acceleration in the rate of weight-loss. The cause was deeper insights and better ability to sustain the diet the more I understood the problem.
166 |
167 | ***Extending the fasting time*** was one major accelerator of weight-loss rate. I did that by:
168 |
169 | > - Skipping breakfast and
170 | > - Stop eating earlier in the evening before going to bed.
171 |
172 | This gave me 14-16 hours of fasting each day. Rather than the more typical 10-12 hours/day of fasting.
173 |
174 | The 2nd accelerator was ***consuming fatty stuff*** (instead of carbs) in order to feel full.
175 |
176 | The 3rd accelerator was understanding the concepts of [Glycemic index](https://en.wikipedia.org/wiki/Glycemic_index) and [***Glycemic Load***](https://en.wikipedia.org/wiki/Glycemic_load), and shifting whatever I chose to eat towards ***lower Glycemic loads***.
177 |
178 | I now believe and hope that I can go all the way back to my original weight when I first landed on US soil.
179 |
180 | If I can keep the present rate, it should take 1-2 years to completely reverse the damage of the past ~20 years.
181 |
182 | It is important to stress that I also *feel much better the more weight I lose*. As a welcome side-effect, the few borderline/high levels in my blood tests, have moved significantly towards normal averages, during the period I lost weight.
183 |
184 | ### What was my data and clear improvement in health saying?
185 |
186 | Looking at my data, and reading more, convinced me that I should beware of doctors [who push statins](https://www.google.com/search?q=the+truth+about+statins) instead of suggesting a better diet. I started doubting anyone who told me I need to *reduce* fat. I run away if anyone now tells me "high cholesterol" in the diet is dangerous.
187 |
188 | Cholesterol, by the way, is an essential building block for many essential body by-products. The liver produces as much cholesterol as we need.
189 |
190 | Our body is an amazing machine. Billions of years of evolution have made it extremely *adaptive*.
191 |
192 | It is not our ***high fat consumption***, it is the ***storage of fat process*** that makes us acummulate fat in the tissues and become unhealthy.
193 |
194 | An enzyme called *Lipase* breaks-up fat. Raise the levels of Lipase and our body fat gets consumed faster. To get there, we need to give the body fat as an *alternative* to carbohydrates. When the body has depleted both the blood sugar, and the glycogen (hydrated sugar) buffer in the liver, it has no other choice but to *adapt and compensate*. Our source of energy -- [ATP synthesis](https://en.wikipedia.org/wiki/Adenosine_triphosphate) -- switches from carbs to fats by producing more fat-breaking agents. The body is a "Flex Fuel" kind of machine, that has simply replaced one fuel (carbs) with another (fat).
195 |
196 | When Lipase, and all other agents in the fat-to-ATP chemical path, aka [Beta oxidation](https://en.wikipedia.org/wiki/Beta_oxidation) mobilize, and their levels are elevated, we burn more fat and lose weight over time.
197 |
198 | In a low-carb/high-fat (LCHF) regime, our night sleep (fasting time) becomes our friend. The fat-breaking agents keep working while we sleep, breaking-up the stored fat. This leads to weight-loss, and a healthier state.
199 |
200 | And when we push even further, and cut carbs to *really* low levels, we may reach a new steady state, called ketosis, in which practically all our energy comes from fat, and that's when we really win big in the weight-loss battle.
201 |
202 | The above is a very simplified, and hopefuly easy to digest, version of what some diet books try to explain in hundreds of pages.
203 |
204 | ## My bottom-line recipe:
205 |
206 | - The hardest part (especially at the beginning) is reducing carbs. The worst are starch rich foods (pizza, pasta, bread etc.), then processed foods with high sugar content (sweet sodas, no-pulp juices, etc). This doesn't mean ***no*** carbs. You may afford yourself carbs from time to time (say a pizza once a week). As it turns out, an occasional lapse isn't enough to completely reverse any steady-state. However, you need to make sure you consume ***much less carbs*** and ***less frequently*** than before. In particular, you must avoid binging on snacks like chips, pizza, doughnuts, pasta, and bread, or drinking sugar-rich drinks.
207 |
208 | - [Look-up Glycemic index](https://en.wikipedia.org/wiki/Glycemic_index) and [Glycemic Load](https://en.wikipedia.org/wiki/Glycemic_load) on wikipedia. ***Avoid foods with high glycemic load***. This prevents the blood sugar spikes which lead to insulin spikes and tell the body chemical cycles to revert back from ketosis, or near ketosis, to fat-accumulation. Have a sweet tooth? Eat an orange instead of drinking orange juice. The two have vastly different glycemic loads and this makes a huge difference. If you must add sweetness to your cup of tea or coffee, use a [Splenda (sucralose+dextrose) tablet](https://en.wikipedia.org/wiki/Splenda), or [a Stevia drop/tablet](https://en.wikipedia.org/wiki/Stevia) which typically weight just ~0.1 gram, rather than a tea-spoon of sugar (~4.2g, about 40x more). Result: similar sweetness effect, but much lower Glycemic load and resulting levels of blood-glucose.
209 |
210 | - High fat: I switched from milk to half-and-half and am considering heavy (and unsweetened) whipped cream. It has less carbs (lactose) and more fat; plus, it tastes better. Eat avocados, olive oil, mayo, coconut oil, nuts. I never worry about *natural* fat, I eat as much fat as I want. This is what makes it much easier to avoid carbs. When I stuff myself with fat I feel much less hungry and miss the carbs less. The body is very good at figuring this out: "I have too much fat in the blood, so let's increase the amount of enzymes which break-up fat" and this makes me lose weight in the long run. Most importantly, I always ***avoid any products labeled "low-fat" or "fat-free"***. The food industry usually replaces fat with sugar, so it tastes better - otherwise it tastes awful. You'll often hear about "bad" vs "good" fat. My take: as long as it is natural, it is ok. The worst trans-fat is fat that's artificially hydrogenated, to increase shelf-life, by the food industry. The less saturated fat is, the better. Mono-saturated (plant) liquid oil is the best, then come the poly-unsaturated fats, and finally near saturated (but not fully saturated) fats that come from animals. My buttery-spread spectrum is: *Margarine: no; Butter: ok; Earth Balance: no problem*. At any rate, even the most saturated fat, gets broken and depleted by the natural processes in the body.
211 |
212 | - A bit of exercise. Of course, more is better, but for many this may prove difficult. I don't excercise too much. I just bike to work and back about 20 min each way, meaning 40 min/day, 5 out of 7 days/week. You can try walking the dog (but walk faster), or Zumba dance to music. The trick is to find something that you don't find hard to do. Or find company to do it together. Then, do a little bit of it every day.
213 |
214 | - ***Longer fasting periods:*** This is the #1 contributor to weight-loss. sleep longer, stop eating as early as possible before going to sleep and start eating as late as possible after sleeping. *Skip breakfast*, after some time you won't feel hungry in the morning anymore. After long periods of fasting, the body chemistry adjusts. It needs ATP, but there's a too low level of glucose in the blood. The glycogen in the liver is fully consumed (this takes about 1-2 days of low or no carbs) so there's no other option, but to start looking for other sources, like stored fat. This elevates the enzymes that help with breaking up fat and the Krebs cycle reverses direction in the critical paths. Instead of transforming excess-carbs into stored fat, we break-up stored fat for energy.
215 |
216 | - Eat eggs. They are a wonderful combo of fat and protein with no carbs at all. I read an interview with a [Japanese woman who reached 114 years](Longevity.md) and one of her secrets was to eat eggs daily. My favorite food is a scrambled egg with grilled onions (onions are a bit high on carbs, but too tasty to give up) and olives.
217 |
218 | - Eat slower, and chew longer... don't swallow just yet! Humans, just like dogs, tend to swallow too soon. Stop eating when you feel full. There's about 20 min delay before your brain registers that you are full so don't over-eat.
219 |
220 | ***
221 |
222 | ## Further reading:
223 |
224 | - [The Krebs (aka Citric acid) cycle](https://en.wikipedia.org/wiki/Citric_acid_cycle)
225 | - [Spikes of Insulin and their effects](https://en.wikipedia.org/wiki/Sugar_crash) -- what the body does when it has excess of sugar vs excess of fat.
226 | - [Glycemic Index](https://en.wikipedia.org/wiki/Glycemic_index)
227 | - [Glycemic Load](https://en.wikipedia.org/wiki/Glycemic_load) -- a better metric for weight-loss than Glycemic Index.
228 | - [Glycogen and its storage in the liver](https://en.wikipedia.org/wiki/Glycogen)
229 | - [Ketone bodies](https://en.wikipedia.org/wiki/Ketone_bodies)
230 | - [Ketosis -- not to be confused with keto-acidosis](https://en.wikipedia.org/wiki/Ketosis)
231 | - [Ketogenic diet](https://en.wikipedia.org/wiki/Ketogenic_diet)
232 |
233 |
234 |
237 |
238 | - [Why We Get Fat: And What to Do About It / Gary Taubes](http://www.amazon.com/gp/product/0307272702)
239 | - [Summary of Good Calories, Bad Calories / Gary Taub by Lower Thought](https://lowerthought.wordpress.com/complete-notes-to-good-calories-bad-calories/)
240 | - [The Obesity Code: Unlocking the Secrets of Weight Loss / Jason Fung](https://www.amazon.com/Obesity-Code-Unlocking-Secrets-Weight-ebook/dp/B01C6D0LCK/)
241 | - [The best summary about statins I've seen](http://www.newswithviews.com/Howenstine/james23.htm)
242 | - [High cholesterol doesn't cause heart disease](http://www.telegraph.co.uk/science/2016/06/12/high-cholesterol-does-not-cause-heart-disease-new-research-finds/)
243 | - [Dr. Mark Hyman take on a good diet (a bit different than mine)](http://drhyman.com/blog/2014/08/18/one-test-doctor-isnt-save-life/)
244 |
245 | #### Documentaries:
246 |
247 | - [Food, Inc. (2008)](https://www.netflix.com/title/70108783)
248 | - [Sugar Coated (2015)](https://www.netflix.com/title/80100595)
249 |
250 | #### More videos
251 |
252 | - [Reversing Type 2 diabetes starts with ignoring the guidelines | Sarah Hallberg | TEDxPurdueU](https://www.youtube.com/watch?v=da1vvigy5tQ)
253 |
254 | A nice 7:41 minute video of James McCarter in Quantified Self (an eye opener for me):
255 |
256 | - [James McCarter: The Effects of a Year in Ketosis](https://vimeo.com/147795263)
257 |
258 | #### Questions, Answers, Comments
259 |
260 | [Some questions and comments I got and tried to answer](QandA.md)
261 |
262 |
268 |
269 | ## Acknowledgements
270 |
271 | Big thanks to the following people for contributing to this project in myriad ways,
272 | comments, references, corrections, etc.
273 |
274 | _Anat Faigon, Ingrid Kane, Hans Lee, Steve Malmskog, Eyal Friedman, Shiri Shoham, Gabi Harel, Shingi, Noa_
275 |
276 | _Update: 2016-08-12: this project made [Hacker News](https://news.ycombinator.com/item?id=12279415) and reached the top place for a while. Thanks for some great comments by benkuhn, aab0, zzleeper, and others which helped me make it better._
277 | 
278 |
279 | Special thanks to John Langford and the many other contributors to [vowpal wabbit](https://en.wikipedia.org/wiki/Vowpal_Wabbit).
280 |
281 |
282 | #### License:
283 |
284 | This code and additional material are released under a permissive and simple [2-clause BSD licence](Licence.md). The one sentence summary of this is "as long as you don't sue me and not claim it as your own, you should be ok."
285 |
286 |
--------------------------------------------------------------------------------