6 |
7 |
8 |
32 |
33 |
34 | {{ content }}
35 |
36 |
--------------------------------------------------------------------------------
/_plugins/main_column_img.rb:
--------------------------------------------------------------------------------
1 | ## Liquid tag 'maincolumn-figure' used to add image data that fits within the
2 | ## main column area of the layout
3 | ## Usage {% maincolumn 'path/to/image' 'This is the caption' %}
4 | #
5 | module Jekyll
6 | class RenderMainColumnTag < Liquid::Tag
7 |
8 | require "shellwords"
9 |
10 | def initialize(tag_name, text, tokens)
11 | super
12 | @text = text.shellsplit
13 | end
14 |
15 | def render(context)
16 | baseurl = context.registers[:site].config['baseurl']
17 | if @text[0].start_with?('http://', 'https://','//')
18 | "#{@text[1]}"
19 | else
20 | "#{@text[1]}"
21 | end
22 | end
23 | end
24 | end
25 |
26 | Liquid::Template.register_tag('maincolumn', Jekyll::RenderMainColumnTag)
27 |
--------------------------------------------------------------------------------
/_plugins/fullwidth.rb:
--------------------------------------------------------------------------------
1 | ## This has a fairly harmless hack that wraps the img tag in a div to prevent it from being
2 | ## wrapped in a paragraph tag instead, which would totally fuck things up layout-wise
3 | ## Usage {% fullwidth 'path/to/image' 'caption goes here in quotes' %}
4 | #
5 | module Jekyll
6 | class RenderFullWidthTag < Liquid::Tag
7 |
8 | require "shellwords"
9 |
10 | def initialize(tag_name, text, tokens)
11 | super
12 | @text = text.shellsplit
13 | end
14 |
15 | def render(context)
16 | baseurl = context.registers[:site].config['baseurl']
17 | if @text[0].start_with?('http://', 'https://','//')
18 | ""+
19 | "#{@text[1]}"
20 | else
21 | ""+
22 | "#{@text[1]}"
23 | end
24 | end
25 | end
26 | end
27 |
28 | Liquid::Template.register_tag('fullwidth', Jekyll::RenderFullWidthTag)
29 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # cs323-notes
2 |
3 | These notes are based on Stanford [CS323](http://cs.stanford.edu/~ermon/cs323/index.html), taught by [Stefano Ermon](http://cs.stanford.edu/~ermon/), and have been written by Michael Zhu.
4 |
5 | This course is a graduate level introduction to automated reasoning techniques and their applications, covering logical and probabilistic approaches. Topics include: logical and probabilistic foundations, backtracking strategies and algorithms behind modern SAT solvers, stochastic local search and Markov Chain Monte Carlo algorithms, classes of reasoning tasks and reductions, and applications.
6 |
7 | The compiled version is available [here](http://ermongroup.github.io/cs323-notes/).
8 |
9 | ## Contributing
10 |
11 | The notes are still under construction! If you find any typos or can contribute to help make the notes better, please let us know, or submit a pull request with your fixes via Github.
12 |
13 | The notes are written in Markdown and are compiled into HTML using Jekyll. Please add your changes directly to the Markdown source code.
--------------------------------------------------------------------------------
/_sass/_settings.scss:
--------------------------------------------------------------------------------
1 | /* This file contains all the constants for colors and font styles */
2 |
3 | $body-font: et-book, Palatino, "Palatino Linotype", "Palatino LT STD", "Book Antiqua", Georgia, serif;
4 | // $body-font: ETBembo, Palatino, "Palatino Linotype", "Palatino LT STD", "Book Antiqua", Georgia, serif;
5 | // Note that Gill Sans is the top of the stack and corresponds to what is used in Tufte's books
6 | // However, it is not a free font, so if it is not present on the computer that is viewing the webpage
7 | // The free Google 'Lato' font is used instead. It is similar.
8 | $sans-font: "Gill Sans", "Gill Sans MT", "Lato", Calibri, sans-serif;
9 | $code-font: Consolas, "Liberation Mono", Menlo, Courier, monospace;
10 | $url-font: "Lucida Console", "Lucida Sans Typewriter", Monaco, "Bitstream Vera Sans Mono", monospace;
11 | $text-color: #111;
12 | $bg-color: #fffff8;
13 | $contrast-color: #a00000;
14 | $border-color: #333333;
15 | $link-style: underline; // choices are 'color' or 'underline'. Default is color using $contrast-color set above
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2017 Volodymyr Kuleshov, Ermon Group
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/_plugins/margin_figure.rb:
--------------------------------------------------------------------------------
1 | ## Liquid tag 'maincolumn' used to add image data that fits within the main
2 | ## column area of the layout
3 | ## Usage {% marginfigure 'margin-id-whatever' 'path/to/image' 'This is the caption' %}
4 | #
5 | module Jekyll
6 | class RenderMarginFigureTag < Liquid::Tag
7 |
8 | require "shellwords"
9 |
10 | def initialize(tag_name, text, tokens)
11 | super
12 | @text = text.shellsplit
13 | end
14 |
15 | def render(context)
16 | baseurl = context.registers[:site].config['baseurl']
17 | if @text[1].start_with?('http://', 'https://', '//')
18 | ""+
19 | ""+
20 | " #{@text[2]}"
21 | else
22 | ""+
23 | ""+
24 | " #{@text[2]}"
25 | end
26 | end
27 | end
28 | end
29 |
30 | Liquid::Template.register_tag('marginfigure', Jekyll::RenderMarginFigureTag)
31 |
--------------------------------------------------------------------------------
/_includes/head.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | {% if page.title %}{{ page.title }}{% else %}{{ site.title }}{% endif %}
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | {% if site.data.options.lato_font_load %}
15 |
16 | {% endif %}
17 |
18 | {% if site.data.options.mathjax %}
19 |
20 | {% endif %}
21 |
22 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: post
3 | title: Contents
4 | ---
5 |
6 | These notes are based on Stanford [CS323](http://cs.stanford.edu/~ermon/cs323/index.html), taught by [Stefano Ermon](http://cs.stanford.edu/~ermon/), and have been written by Michael Zhu. The notes are still under construction! If you find any typos or can contribute to help make the notes better, please let us know, or submit a pull request with your fixes to our [Github repository](https://github.com/ermongroup/cs323-notes).
7 |
8 | This course is a graduate level introduction to automated reasoning techniques and their applications, covering logical and probabilistic approaches. Topics include: logical and probabilistic foundations, backtracking strategies and algorithms behind modern SAT solvers, stochastic local search and Markov Chain Monte Carlo algorithms, classes of reasoning tasks and reductions, and applications.
9 |
10 |
11 | ## Logical Reasoning
12 |
13 | - [Representation - propositional logic](logic/representation/): Definitions: syntax, semantics, knowledge base. Satisfiability: inference reduces to satisfiability, conjunctive normal form.
14 | - [Inference - SAT solvers](logic/inference/): Brute force, early stopping, unit resolution, DPLL algorithm, conflict-driven clause learning, engineering considerations, tutorial on practical SAT solvers.
15 | - [Tractable classes of SAT problems](logic/tractable/): Horn SAT and 2-SAT.
16 | - [Inference - random walk SAT solvers](logic/random_walk/): Introduction, review of Markov chains. Random walk algorithm for 2-SAT and analysis. Random walk algorithm for 3-SAT and analysis. Other variants.
17 | - [References](logic/references/)
18 |
19 | ## Probabilistic Reasoning
20 |
21 | - [Generalizing satisfiability problems](probabilistic/generalizing/): Minimum Vertex Cover (Weighted MAX-SAT). Markov logic networks. Factor graphs.
22 | - [Markov chains](probabilistic/markov_chains/): Markov Chain Monte Carlo (MCMC). Markov chains - introduction. Markov chains - proof of convergence.
23 | - [Metropolis-Hastings](probabilistic/mh/): Recap of Markov chains. Markov Chain Monte Carlo (MCMC). Metropolis-Hastings. Metropolis-Hastings algorithm.
24 | - [Gibbs sampling](probabilistic/gibbs/): Sampling and inference tasks. Gibbs sampling. Variants of Gibbs sampling. Simulated annealing.
--------------------------------------------------------------------------------
/_sass/_syntax-highlighting.scss:
--------------------------------------------------------------------------------
1 | /**
2 | * Syntax highlighting styles
3 | */
4 | $spacing-unit: 30px;
5 | %vertical-rhythm {
6 | margin-bottom: $spacing-unit / 2;
7 | }
8 |
9 | .highlight {
10 | background: #fffff8;
11 | @extend %vertical-rhythm;
12 |
13 | .c { color: #998; font-style: italic } // Comment
14 | .err { color: #a61717; background-color: #e3d2d2 } // Error
15 | .k { font-weight: bold } // Keyword
16 | .o { font-weight: bold } // Operator
17 | .cm { color: #998; font-style: italic } // Comment.Multiline
18 | .cp { color: #999; font-weight: bold } // Comment.Preproc
19 | .c1 { color: #998; font-style: italic } // Comment.Single
20 | .cs { color: #999; font-weight: bold; font-style: italic } // Comment.Special
21 | .gd { color: #000; background-color: #fdd } // Generic.Deleted
22 | .gd .x { color: #000; background-color: #faa } // Generic.Deleted.Specific
23 | .ge { font-style: italic } // Generic.Emph
24 | .gr { color: #a00 } // Generic.Error
25 | .gh { color: #999 } // Generic.Heading
26 | .gi { color: #000; background-color: #dfd } // Generic.Inserted
27 | .gi .x { color: #000; background-color: #afa } // Generic.Inserted.Specific
28 | .go { color: #888 } // Generic.Output
29 | .gp { color: #555 } // Generic.Prompt
30 | .gs { font-weight: bold } // Generic.Strong
31 | .gu { color: #aaa } // Generic.Subheading
32 | .gt { color: #a00 } // Generic.Traceback
33 | .kc { font-weight: bold } // Keyword.Constant
34 | .kd { font-weight: bold } // Keyword.Declaration
35 | .kp { font-weight: bold } // Keyword.Pseudo
36 | .kr { font-weight: bold } // Keyword.Reserved
37 | .kt { color: #458; font-weight: bold } // Keyword.Type
38 | .m { color: #099 } // Literal.Number
39 | .s { color: #d14 } // Literal.String
40 | .na { color: #008080 } // Name.Attribute
41 | .nb { color: #0086B3 } // Name.Builtin
42 | .nc { color: #458; font-weight: bold } // Name.Class
43 | .no { color: #008080 } // Name.Constant
44 | .ni { color: #800080 } // Name.Entity
45 | .ne { color: #900; font-weight: bold } // Name.Exception
46 | .nf { color: #900; font-weight: bold } // Name.Function
47 | .nn { color: #555 } // Name.Namespace
48 | .nt { color: #000080 } // Name.Tag
49 | .nv { color: #008080 } // Name.Variable
50 | .ow { font-weight: bold } // Operator.Word
51 | .w { color: #bbb } // Text.Whitespace
52 | .mf { color: #099 } // Literal.Number.Float
53 | .mh { color: #099 } // Literal.Number.Hex
54 | .mi { color: #099 } // Literal.Number.Integer
55 | .mo { color: #099 } // Literal.Number.Oct
56 | .sb { color: #d14 } // Literal.String.Backtick
57 | .sc { color: #d14 } // Literal.String.Char
58 | .sd { color: #d14 } // Literal.String.Doc
59 | .s2 { color: #d14 } // Literal.String.Double
60 | .se { color: #d14 } // Literal.String.Escape
61 | .sh { color: #d14 } // Literal.String.Heredoc
62 | .si { color: #d14 } // Literal.String.Interpol
63 | .sx { color: #d14 } // Literal.String.Other
64 | .sr { color: #009926 } // Literal.String.Regex
65 | .s1 { color: #d14 } // Literal.String.Single
66 | .ss { color: #990073 } // Literal.String.Symbol
67 | .bp { color: #999 } // Name.Builtin.Pseudo
68 | .vc { color: #008080 } // Name.Variable.Class
69 | .vg { color: #008080 } // Name.Variable.Global
70 | .vi { color: #008080 } // Name.Variable.Instance
71 | .il { color: #099 } // Literal.Number.Integer.Long
72 | }
73 |
--------------------------------------------------------------------------------
/logic/tractable/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: post
3 | title: Tractable classes of satisfiability problems
4 | ---
5 |
6 | We now discuss two special cases of satisfiability problems that can be
7 | solved in polynomial time, Horn SAT and 2-SAT. These problems define
8 | subclasses of general CNF formulas which satisfy some specific
9 | structure. By restricting the expressive power of the language, these
10 | subclasses of problems become easier to solve. The algorithms for
11 | solving these problems are essentially based on unit propagation.
12 |
13 | ## Horn SAT
14 |
15 | We begin with the relevant definitions for a Horn formula:
16 | - A literal is a positive literal if it is some variable. A literal is
17 | a negative literal if it is the negation of some variable.
18 | - A clause is positive if all of the literals are positive. A clause
19 | is negative if all of the literals are negative.
20 | - A clause is Horn if it has at most one literal that is positive. For
21 | example, the implication $$(p\land q)\Rightarrow z$$ is equivalent to
22 | $$\neg p\lor\neg q\lor z$$ which is horn.
23 | - A formula is Horn if it is a formula formed by the conjunction of
24 | Horn clauses. For example, the formula
25 | $${\left(\neg p\lor\neg q\lor z\right)}\land{\left(q\lor\neg z\right)}$$
26 | is Horn while
27 | $${\left(\neg p\lor q\lor z\right)}\land{\left(q\lor\neg z\right)}$$
28 | is not Horn.
29 |
30 | **Lemma**: Let $$S$$ be a set of unsatisfiable clauses. Then $$S$$ contains
31 | at least one positive clause and one negative clause.
32 |
33 | ***Proof***: Suppose that the formula contains no positive clause. Then
34 | every clause contains at least one negative literal. We can satisfy the
35 | formula with a truth assignment that assigns false to all of the
36 | variables.
37 |
38 | **Theorem**: Let $$S$$ be a set of Horn clauses. Let $$S'$$ be the set of
39 | clauses obtained by running unit propagation on $$S$$. Then $$S'$$ is
40 | satisfiable if and only if the empty clause does not belong to $$S'$$.
41 |
42 | ***Proof*** ($$\Rightarrow$$): If the empty clause belongs to $$S'$$, then
43 | $$S'$$ cannot be satisfiable because unit propagation is sound.
44 |
45 | ***Proof*** ($$\Leftarrow$$): If $$S$$ is horn, then $$S'$$ is also horn.
46 | Assume $$S'$$ is unsatisfiable. By Lemma 1, $$S'$$ contains at least 1
47 | positive clause and 1 negative clause. A clause that is both positive
48 | and horn must either be a unit clause or an empty clause. We cannot
49 | derive a unit clause since we’ve run unit propagation until a fixed
50 | point, so there must exist an empty clause.
51 |
52 | ## 2-SAT
53 |
54 | A formula is in k-CNF if it is in CNF and each clause has at most k
55 | literals. For example, the formula
56 | $${\left(p\lor q\right)}\land{\left(r\lor s\right)}$$ is in 2-CNF while
57 | $${\left(p\lor q\right)}\land{\left(r\lor s\lor t\right)}$$ is not in
58 | 2-CNF. The following algorithm can be used to determine the
59 | satisfiability of a 2-CNF formula in polynomial time.
60 | 1. \$${\Gamma}\leftarrow KB$$
61 | 2. while $${\Gamma}$$ is not empty do:
62 | 1. \$$L\leftarrow\text{pick a literal from }{\Gamma}$$
63 | 2. \$${\Delta}\leftarrow\text{UP}({\Gamma},L)$$
64 | 3. if $$\{\}\in{\Delta}$$ then
65 | 1. \$${\Delta}\leftarrow\text{UP}({\Gamma},\neg L)$$
66 | 2. if $$\{\}\in{\Delta}$$ then return unsatisfiable
67 | 4. \$${\Gamma}\leftarrow{\Delta}$$
68 | 3. Return satisfiable
69 |
70 | **Lemma**: If $${\Gamma}$$ is a 2-CNF formula in which the literal $$L$$
71 | occurs, then either:
72 | 1. UP$$(\Gamma,L)$$ contains the empty clause $$\{\}$$, so
73 | $${\Gamma}\models\neg L$$.
74 | 2. UP$$(\Gamma,L)$$ is a proper subset of $${\Gamma}$$.
75 |
76 | ***Proof***: For each clause, we consider one of the three cases.
77 | 1. If the clause contains $$L$$, then the clause is satisfied.
78 | 2. If the clause contains $$\neg L$$, then this clause becomes a unit
79 | clause and unit propagation is triggered.
80 | 3. Otherwise, the clause remains unchanged.
81 |
82 | ***Proof of correctness*** ($$\Rightarrow$$): If the algorithm returns
83 | satisfiable, the formula is satisfiable since the algorithm relies on
84 | unit propagation and branching.
85 |
86 | ***Proof of correctness*** ($$\Leftarrow$$): Consider the formula
87 | $${\Gamma}$$ at the beginning of the iteration in which we return
88 | unsatisfiable. Since $${\Gamma}$$ is a 2-CNF formula,
89 | $${\Gamma}\subseteq KB$$ (meaning the set of clauses in $${\Gamma}$$ is
90 | always a subset of the clauses in $$KB$$ ), so if $${\Gamma}$$ is
91 | unsatisfiable, then $$KB$$ is unsatisfiable. We know that both
92 | $${\Gamma}\land L$$ and $${\Gamma}\land\neg L$$ are unsatisfiable. This
93 | implies that $${\Gamma}$$ is unsatisfiable, so $$KB$$ is unsatisfiable.
94 |
--------------------------------------------------------------------------------
/_sass/_fonts.scss:
--------------------------------------------------------------------------------
1 | // Font imports file. If you don't want these fonts, comment out these and add your own into the fonts directory
2 | // and point the src attribute to the file.
3 | //
4 |
5 | @charset "UTF-8";
6 | //
7 | // @font-face {
8 | // font-family: ETBembo;
9 | // src: url("../fonts/et-bembo/et-bembo-roman-line-figures/et-bembo-roman-line-figures.eot");
10 | // src: url("../fonts/et-bembo/et-bembo-roman-line-figures/et-bembo-roman-line-figures.eot?#iefix") format("embedded-opentype"), url("../fonts/et-bembo/et-bembo-roman-line-figures/et-bembo-roman-line-figures.woff") format("woff"), url("../fonts/et-bembo/et-bembo-roman-line-figures/et-bembo-roman-line-figures.ttf") format("truetype"), url("../fonts/et-bembo/et-bembo-roman-line-figures/et-bembo-roman-line-figures.svg#etbemboromanosf") format("svg");
11 | // font-weight: normal;
12 | // font-style: normal
13 | // }
14 | //
15 | // @font-face {
16 | // font-family: ETBembo;
17 | // src: url("../fonts/et-bembo/et-bembo-display-italic-old-style-figures/et-bembo-display-italic-old-style-figures.eot");
18 | // src: url("../fonts/et-bembo/et-bembo-display-italic-old-style-figures/et-bembo-display-italic-old-style-figures.eot?#iefix") format("embedded-opentype"), url("../fonts/et-bembo/et-bembo-display-italic-old-style-figures/et-bembo-display-italic-old-style-figures.woff") format("woff"), url("../fonts/et-bembo/et-bembo-display-italic-old-style-figures/et-bembo-display-italic-old-style-figures.ttf") format("truetype"), url("../fonts/et-bembo/et-bembo-display-italic-old-style-figures/et-bembo-display-italic-old-style-figures.svg#etbemboromanosf") format("svg");
19 | // font-weight: normal;
20 | // font-style: italic
21 | // }
22 | //
23 | // @font-face {
24 | // font-family: ETBembo;
25 | // src: url("../fonts/et-bembo/et-bembo-bold-line-figures/et-bembo-bold-line-figures.eot");
26 | // src: url("../fonts/et-bembo/et-bembo-bold-line-figures/et-bembo-bold-line-figures.eot?#iefix") format("embedded-opentype"), url("../fonts/et-bembo/et-bembo-bold-line-figures/et-bembo-bold-line-figures.woff") format("woff"), url("../fonts/et-bembo/et-bembo-bold-line-figures/et-bembo-bold-line-figures.ttf") format("truetype"), url("../fonts/et-bembo/et-bembo-bold-line-figures/et-bembo-bold-line-figures.svg#etbemboromanosf") format("svg");
27 | // font-weight: bold;
28 | // font-style: normal
29 | // }
30 | //
31 | // @font-face {
32 | // font-family: ETBemboRomanOldStyle;
33 | // src: url("../fonts/et-bembo/et-bembo-roman-old-style-figures/et-bembo-roman-old-style-figures.eot");
34 | // src: url("../fonts/et-bembo/et-bembo-roman-old-style-figures/et-bembo-roman-old-style-figures.eot?#iefix") format("embedded-opentype"), url("../fonts/et-bembo/et-bembo-roman-old-style-figures/et-bembo-roman-old-style-figures.woff") format("woff"), url("../fonts/et-bembo/et-bembo-roman-old-style-figures/et-bembo-roman-old-style-figures.ttf") format("truetype"), url("../fonts/et-bembo/et-bembo-roman-old-style-figures/et-bembo-roman-old-style-figures.svg#etbemboromanosf") format("svg");
35 | // font-weight: normal;
36 | // font-style: normal;
37 | // }
38 |
39 |
40 | @font-face {
41 | font-family: "et-book";
42 | src: url("../fonts/et-book/et-book-roman-line-figures/et-book-roman-line-figures.eot");
43 | src: url("../fonts/et-book/et-book-roman-line-figures/et-book-roman-line-figures.eot?#iefix") format("embedded-opentype"), url("../fonts/et-book/et-book-roman-line-figures/et-book-roman-line-figures.woff") format("woff"), url("../fonts/et-book/et-book-roman-line-figures/et-book-roman-line-figures.ttf") format("truetype"), url("../fonts/et-book/et-book-roman-line-figures/et-book-roman-line-figures.svg#etbookromanosf") format("svg");
44 | font-weight: normal;
45 | font-style: normal
46 | }
47 |
48 | @font-face {
49 | font-family: "et-book";
50 | src: url("../fonts/et-book/et-book-display-italic-old-style-figures/et-book-display-italic-old-style-figures.eot");
51 | src: url("../fonts/et-book/et-book-display-italic-old-style-figures/et-book-display-italic-old-style-figures.eot?#iefix") format("embedded-opentype"), url("../fonts/et-book/et-book-display-italic-old-style-figures/et-book-display-italic-old-style-figures.woff") format("woff"), url("../fonts/et-book/et-book-display-italic-old-style-figures/et-book-display-italic-old-style-figures.ttf") format("truetype"), url("../fonts/et-book/et-book-display-italic-old-style-figures/et-book-display-italic-old-style-figures.svg#etbookromanosf") format("svg");
52 | font-weight: normal;
53 | font-style: italic
54 | }
55 |
56 | @font-face {
57 | font-family: "et-book";
58 | src: url("../fonts/et-book/et-book-bold-line-figures/et-book-bold-line-figures.eot");
59 | src: url("../fonts/et-book/et-book-bold-line-figures/et-book-bold-line-figures.eot?#iefix") format("embedded-opentype"), url("../fonts/et-book/et-book-bold-line-figures/et-book-bold-line-figures.woff") format("woff"), url("../fonts/et-book/et-book-bold-line-figures/et-book-bold-line-figures.ttf") format("truetype"), url("../fonts/et-book/et-book-bold-line-figures/et-book-bold-line-figures.svg#etbookromanosf") format("svg");
60 | font-weight: bold;
61 | font-style: normal
62 | }
63 |
64 | @font-face {
65 | font-family: "et-book-roman-old-style";
66 | src: url("../fonts/et-book/et-book-roman-old-style-figures/et-book-roman-old-style-figures.eot");
67 | src: url("../fonts/et-book/et-book-roman-old-style-figures/et-book-roman-old-style-figures.eot?#iefix") format("embedded-opentype"), url("../fonts/et-book/et-book-roman-old-style-figures/et-book-roman-old-style-figures.woff") format("woff"), url("../fonts/et-book/et-book-roman-old-style-figures/et-book-roman-old-style-figures.ttf") format("truetype"), url("../fonts/et-book/et-book-roman-old-style-figures/et-book-roman-old-style-figures.svg#etbookromanosf") format("svg");
68 | font-weight: normal;
69 | font-style: normal;
70 | }
71 |
--------------------------------------------------------------------------------
/probabilistic/gibbs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: post
3 | title: Gibbs sampling
4 | ---
5 |
6 | # Sampling and inference tasks
7 |
8 | In sampling, we are concerned with how to sample from a target
9 | probability distribution $$p(x)=\frac{w(x)}{Z}$$. Given samples
10 | $$x_{1},...,x_{T}\sim p(x)$$, we can express a quantity of interest as the
11 | expected value of a random variable and then use the estimator
12 | $$\frac{1}{T}\sum_{t=1}^{T}\phi(x_{t})$$ to estimate
13 | $$\mathbb{E}_{p(x)}[\phi(x)]=\sum_{x}p(x)\phi(x)$$ . For example, to
14 | estimate the marginal probability $$P(x\left[1\right]=T)$$, we let
15 | $$\phi(x)={\mathbb{I}\left[{x\left[1\right]=T}\right]}$$. Thus, we see
16 | that we can use a MCMC algorithm to draw samples from $$p(x)$$ and then
17 | use the samples to estimate quantities of interest.
18 |
19 | # Gibbs sampling
20 |
21 | Last lecture, we described the Metropolis-Hastings algorithm for
22 | sampling from a probability distribution
23 | $$p(x)=\frac{1}{Z}w(x)=\frac{1}{Z}\prod_{c}\phi_{c}(x)$$ by using a
24 | proposal distribution and acceptance probabilities for accepting
25 | proposed moves. The MH algorithm only required computing the ratios
26 | $$p(x')/p(x)$$. Note that the MH algorithm treats $$p(x)$$ as a black box
27 | and does not leverage any particular structure of $$p(x)$$. Similarly, the
28 | proposal distribution we choose typically does not depend on $$p(x)$$ and
29 | also does not leverage any structure of $$p(x)$$.
30 |
31 | Gibbs sampling is a MCMC algorithm that repeatedly samples from the
32 | conditional distribution of one variable of the target distribution $$p$$,
33 | given all of the other variables. Gibbs sampling works as follows:
34 | 1. Initialize $$x^{(t)}={\left(x_{1}^{(t)},...,x_{k}^{(t)}\right)}$$ for
35 | $$t=0$$
36 | 2. For $$t=0,1,...$$
37 | 1. Pick index $$i$$ uniformly at random from $$1,...,k$$
38 | 2. Draw a sample $$a\sim p(x_{i}'\mid x_{-i}^{(t)})$$ where
39 | $$x_{-i}^{(t)}$$ is the set of all variables in $$x^{(t)}$$ except
40 | for the $$i^{th}$$ variable.
41 | 3. Let
42 | $$x^{(t+1)}=(x_{1}^{(t)},x_{2}^{(t)},...,x_{i-1}^{(t)},a,x_{i+1}^{(t)},...,x_{k}^{(t)})$$.
43 |
44 | Gibbs sampling assumes we can compute conditional distributions of one
45 | variable conditioned on all of the other variables and sample exactly
46 | from these distributions. In graphical models, the conditional
47 | distribution of some variable only depends on the variables in the
48 | Markov blanket of that node.
49 |
50 | Let us now show that Gibbs sampling is a special case of
51 | Metropolis-Hastings where the proposed moves are always accepted (the
52 | acceptance probability is 1).
53 |
54 | Let $$x_{i}$$ denote the $$i^{th}$$ variable, and let $$x_{-i}$$ denote the
55 | set of all variables except $$x_{i}$$. Let
56 | $$Q(x_{i}',x_{-i}\mid x_{i},x_{-i})=\frac{1}{k}p(x_{i}'\mid x_{-i})$$. Let
57 | $$A(x_{i}',x_{-i}\mid x_{i},x_{-i})=\min(1,{\alpha})$$ where
58 |
59 | \$$
60 | \begin{eqnarray*}
61 | {\alpha}& = & \frac{p(x_{i}',x_{-i})Q(x_{i},x_{-i}\mid x_{i}',x_{-i})}{p(x_{i},x_{-i})Q(x_{i}',x_{-i}\mid x_{i},x_{-i})}\\
62 | & = & \frac{p(x_{i}',x_{-i})p(x_{i}\mid x_{-i})}{p(x_{i},x_{-i})p(x_{i}'\mid x_{-i})}\\
63 | & = & \frac{p(x_{i}'\mid x_{-i})p(x_{-i})p(x_{i}\mid x_{-i})}{p(x_{i}\mid x_{-i})p(x_{-i})p(x_{i}'\mid x_{-i})}\\
64 | & = & 1
65 | \end{eqnarray*}
66 | \$$
67 | {: style="text-align: center"}
68 |
69 | Gibbs sampling is used very often in practice since we don’t have to
70 | design a proposal distribution. Note that the Gibbs sampling algorithm
71 | described earlier is known as random scan Gibbs sampling because we
72 | choose an index uniformly at random in each iteration. A common
73 | implementation of Gibbs sampling is systematic scan Gibbs sampling where
74 | we have a for loop that goes through all of the variables $$x_{i}$$ in
75 | some order and samples $$x_{i}^{(t+1)}$$ from the conditional distribution
76 | of $$x_{i}'$$ given all of the other variables.
77 |
78 | # Variants of Gibbs sampling
79 |
80 | One variant of Gibbs sampling is blocked Gibbs sampling, where we group
81 | two or more variables together in a block and update this block by
82 | sampling from the joint distribution of these variables conditioned on
83 | all of the other variables. Updating more variables at a time in blocks
84 | is helpful. Consider the limiting case where if we could sample from a
85 | block containing all of the variables, then we could sample directly
86 | from $$p(x)$$.
87 |
88 | Another variant is collapsed Gibbs sampling. In this algorithm, we
89 | marginalize out as many variables as possible before sampling from the
90 | conditional distribution of some variable.
91 |
92 | As an example, suppose the target probability distribution we want to
93 | sample from is $$p(x,y\mid z)$$.
94 |
95 | In Gibbs sampling, we would alternately sample
96 | $$x^{(t+1)}\sim p(x\mid y^{(t)},z)$$ and
97 | $$y^{(t+1)}\sim p(y\mid x^{(t+1)},z)$$.
98 |
99 | In collapsed Gibbs sampling, we would alternately sample
100 | $$y^{(t+1)}\sim p(y\mid z)$$ and then
101 | $$x^{(t+1)}\sim p(x\mid y^{(t+1)},z)$$. Note that in this case, we are
102 | drawing samples from the exact distribution
103 | $$p(x,y\mid z)=p(x\mid y,z)p(y\mid z)$$.
104 |
105 | Note that the ordering of the variables in the sampling procedure is
106 | very important for collapsed Gibbs sampling (to ensure that the
107 | resulting Markov chain has the right stationary distribution) since the
108 | right ordering might depend on which variables we marginalize out.
109 |
110 | # Simulated annealing
111 |
112 | Simulated annealing is an adaptation of the Metropolis-Hastings
113 | algorithm and is a heuristic for finding the global maximum of a given
114 | function. Consider the case of SAT where we are given a CNF formula and
115 | want to find a satisfying assignment. Simulated annealing moves around
116 | the space trying to find assignments that satisfy as many clauses as
117 | possible.
118 |
119 | We construct a probability distribution that puts high probability on
120 | assignments that satisfy many clauses. Let
121 | $$p({\sigma})=\frac{\exp{\left(E({\sigma})/T\right)}}{Z}$$ where
122 | $$E({\sigma})$$ is the number of clauses satisfied by $${\sigma}$$ and $$T$$
123 | is the “temperature” parameter. As $$T\to\infty$$, $$p({\sigma})$$
124 | approaches the uniform distribution. As $$T\to0$$, $$p({\sigma})$$ tends to
125 | put all of the probability mass on satisfying assignments.
126 |
127 | To solve the optimization problem, we want to sample from this
128 | distribution when $$T$$ is small. We can use Metropolis-Hastings with a
129 | proposal distribution that randomly picks a variable and flips it. Let
130 | $$Q(x'\mid x)$$ be equal to $$1/n$$ if $$x,x'$$ differ in only one variable
131 | and $$0$$ otherwise. The Metropolis-Hastings acceptance probability is
132 | $$A(x'\mid x)=\min{\left(1,\frac{p(x')}{p(x)}\right)}=\min{\left(1,\exp{\left(\frac{E({\sigma}')-E({\sigma})}{T}\right)}\right)}$$.
133 | If $$E({\sigma}')\geq E({\sigma})$$, then we always accept the transition.
134 | $$T$$ controls how greedy we are.
135 |
136 | The algorithm starts with a large $$T$$ in order to move around the space
137 | freely and decreases $$T$$ slowly to concentrate probability mass on the
138 | states which satisfy the most clauses. $$T$$ is decreased based on some
139 | annealing schedule. Each iteration is otherwise a MH update.
140 |
--------------------------------------------------------------------------------
/probabilistic/mh/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: post
3 | title: 'Metropolis-Hastings'
4 | ---
5 |
6 | # Recap of Markov chains
7 |
8 | In the last lecture, we learned that if a Markov chain is irreducible
9 | and aperiodic, then the Markov chain will converge to its unique
10 | stationary distribution, regardless of the initial state.
11 | Mathematically, $$\lim_{n\to\infty}P^{n}=W$$, a matrix $$W$$ where every row
12 | is equal to some vector $$w$$.
13 |
14 | Recall that $$P$$ is a stochastic matrix. $$P$$ has a left eigenvector $$w$$
15 | with eigenvalue 1. $$P$$ has no other eigenvectors with modulus 1. Let
16 | $$\{v_{i}\}$$ be a basis of eigenvectors for $$P$$ where each $$v_{i}$$
17 | satisfies $$v_{i}P={\lambda}_{i}v_{i}$$. When we start with an initial
18 | $$\Pi^{0}=\sum_{i}{\alpha}_{i}v_{i}$$, then
19 | $$\Pi^{0}P=\sum_{i}{\alpha}_{i}v_{i}P=\sum_{i}{\alpha}_{i}{\lambda}_{i}v_{i}$$
20 | and
21 | $$\Pi^{0}P^{n}=\sum_{i}{\alpha}_{i}{\left({\lambda}_{i}\right)}^{n}v_{i}$$,
22 | and $${\left({\lambda}_{i}\right)}^{n}\to0$$ for the eigenvalues
23 | corresponding to all eigenvectors except $$w$$. The speed at which we have
24 | convergence is determined by the size of the second-largest eigenvalue.
25 |
26 | Our proof of the theorem last time was based on the fact that every time
27 | we apply $$P$$, we decrease the gap between the smallest and largest value
28 | of the state vector, and hence the state vector converges to a constant
29 | vector. Note that if $$d$$ is larger, then the Markov chain will converge
30 | more quickly. By the definition of $$d$$, the Markov chain will converge
31 | more quickly if there is no pair of states with a very low probability
32 | of transitioning between the states.
33 |
34 | # Markov Chain Monte Carlo (MCMC)
35 |
36 | Our goal in Markov Chain Monte Carlo (MCMC) is to sample from a
37 | probability distribution
38 | $$p(x)=\frac{1}{Z}w(x)=\frac{1}{Z}\prod_{c}\phi_{c}(x)$$. We want to
39 | construct a Markov chain that reaches the limiting distribution $$p(x)$$
40 | as fast as possible. The main question is how do we design a transition
41 | matrix $$P$$ so that $$p(x)$$ is a left eigenvector of $$P$$ with eigenvalue
42 | 1.
43 |
44 | Note that in the high-dimensional case, we cannot even store all of the
45 | entries of $$w(x)$$. We will only assume we can evaluate
46 | $$w(x)=\prod_{c}\phi_{c}(x)$$ for any $$x$$. This also means that we do not
47 | need to calculate the normalization constant $$Z$$.
48 |
49 | The key assumption we will make is that the Markov chain is reversible.
50 | A Markov chain is reversible if there exists a distribution $$\Pi^{*}$$
51 | which satisfies the detailed balance conditions: $$\forall i,j$$,
52 | $$\Pi_{i}^{*}P_{ij}=\Pi_{j}^{*}P_{ji}$$.
53 |
54 | **Theorem**: If a distribution $$\Pi^{*}$$ is reversible, then $$\Pi^{*}$$
55 | is a stationary distribution.
56 |
57 | ***Proof***: For any state $$j$$, we have
58 |
59 | \$$
60 | \begin{eqnarray*}
61 | \sum_{i}\Pi_{i}^{*}P_{ij} & = & \sum_{i}\Pi_{j}^{*}P_{ji}\\
62 | \sum_{i}\Pi_{i}^{*}P_{ij} & = & \Pi_{j}^{*}
63 | \end{eqnarray*}
64 | \$$
65 | {: style="text-align: center"}
66 |
67 | Therefore, $$\Pi^{*}P=\Pi^{*}$$.
68 |
69 | Since we want the stationary distribution of the Markov chain to be
70 | $$p(x)$$, it suffices to design the transition matrix $$P$$ so the Markov
71 | chain satisfies detailed balance with respect to $$p(x)$$.
72 |
73 | # Metropolis-Hastings
74 |
75 | Metropolis-Hastings is a MCMC method for sampling from a probability
76 | distribution $$p(x)=\frac{1}{Z}w(x)=\frac{1}{Z}\prod_{c}\phi_{c}(x)$$ by
77 | using a proposal distribution for proposing moves and then accepting or
78 | rejecting proposed moves between states with some probability.
79 |
80 | First, let $$Q$$ be any proposal distribution where $$q(i,j)=Q(j\mid i)$$ is
81 | the probability of proposing a move to some state $$j$$ given the current
82 | state $$i$$. Then we will construct the transition matrix $$P$$ as
83 |
84 |
85 | \$$
86 | \begin{eqnarray*}
87 | P_{ij} & = & P(X_{n}=j\mid X_{n-1}=i)=\begin{cases}
88 | q(i,j){\alpha}(i,j) & \text{if }j\ne i\\
89 | q(i,i)+\sum_{k\ne i}q(i,k)(1-{\alpha}(i,k)) & \text{otherwise}
90 | \end{cases}\\
91 | \end{eqnarray*}
92 | \$$
93 | {: style="text-align: center"}
94 |
95 | where
96 | $${\alpha}(i,j)=\min\bigg\{1,\frac{p(j)q(j,i)}{p(i)q(i,j)}\bigg\}=\min\bigg\{1,\frac{w(j)q(j,i)}{w(i)q(i,j)}\bigg\}$$
97 | is the acceptance probability for accepting a proposed move from state
98 | $$i$$ to state $$j$$. Note that while we cannot evaluate $$p(x)$$ exactly, we
99 | can evaluate ratios $$p(j)/p(i)$$.
100 |
101 | We want to show that $$p$$ satisfies detailed balance for all $$i,j$$. By
102 | the definition of $${\alpha}$$, without loss of generality, assume that
103 | $${\alpha}(j,i)=1$$ and $${\alpha}(i,j)=\frac{w(j)q(j,i)}{w(i)q(i,j)}$$.
104 | Then
105 |
106 | \$$
107 | \begin{eqnarray*}
108 | w(i)q(i,j)\frac{w(j)q(j,i)}{w(i)q(i,j)} & = & w(j)q(j,i)\\
109 | w(i)q(i,j){\alpha}(i,j) & = & w(j)q(j,i){\alpha}(j,i)\\
110 | p(i)P_{ij} & = & p(j)P_{ji}
111 | \end{eqnarray*}
112 | \$$
113 | {: style="text-align: center"}
114 |
115 | Therefore, $$p$$ satisfies detailed balance and is a stationary
116 | distribution.
117 |
118 | Now we just need to ensure that the Markov chain is irreducible and
119 | aperiodic. This depends on our choice of the proposal distribution $$Q$$
120 | and target probability distribution $$p$$ since $$p(i)q(i,j){\alpha}(i,j)$$
121 | defines the probability of transitioning from $$i\to j$$.
122 |
123 | The intuition behind MH is that sampling from the Markov chain requires
124 | spending the right amount of time in the right states so that our
125 | samples are accurate draws. We need to balance the transitions between
126 | higher probability states and lower probability states under $$p$$ with
127 | the tendency of the proposal distribution to go to certain states. Given
128 | $$p$$ and $$Q$$, MH will set $${\alpha}$$ so $$p$$ satisfies detailed balance.
129 |
130 | # Metropolis-Hastings algorithm
131 |
132 | The procedure for the Metropolis-Hastings algorithm is as follows:
133 | 1. Initialize $$x^{(t)}$$ for $$t=0$$
134 | 2. Draw a sample $$x'$$ from $$Q(x'\mid x^{(t)})$$
135 | 3. Accept the move with probability
136 | $$A(x'\mid x^{(t)})=\min(1,{\alpha})$$ where
137 | $${\alpha}=\frac{p(x')Q(x^{(t)}\mid x')}{p(x^{(t)})Q(x'\mid x^{(t)})}$$.
138 | If accepted, let $$x^{(t+1)}=x'$$. Otherwise, let $$x^{(t+1)}=x^{(t)}$$.
139 | 4. Repeat steps 2-3 to draw samples
140 |
141 | In practice, we run a burn-in period of $$T$$ iterations and start
142 | collecting samples only after the burn-in period and hope that the
143 | Markov chain has converged by then. However, determining when the Markov
144 | chain has converged is a hard problem. One heuristic is to randomly
145 | initialize several Markov chains, plot some scalar function of the state
146 | of the Markov chain over time, and see if the scalar functions are
147 | similar. Note that this does not always guarantee convergence. For
148 | example, consider the case where we have a bimodal distribution or a
149 | singly peaked distribution.
150 |
151 | The samples drawn from the Markov chain are not i.i.d. In general,
152 | samples drawn close to each other can be highly correlated since
153 | Metropolis-Hastings moves tend to be local moves. However
154 | asymptotically, the samples drawn from the Markov chain are all unbiased
155 | and all come from the right distribution. The variance will not decrease
156 | as fast as if we had independent samples though.
157 |
--------------------------------------------------------------------------------
/logic/representation/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: post
3 | title: 'Representation - propositional logic'
4 | ---
5 |
6 | We begin our course by studying propositional logic. After we introduce
7 | the definitions, we will discuss satisfiability in propositional logic
8 | and then move on to state of the art techniques to solve systems
9 | specified with propositional logic.
10 |
11 | # Definitions
12 |
13 | Propositional logic is a formal language used to specify knowledge in a
14 | mathematically rigorous way. We first define the syntax (grammar) and
15 | then the semantics (meaning) of sentences in propositional logic.
16 |
17 | ## Syntax
18 |
19 | We denote our propositional variables by $$p_{1},...,p_{n}$$ where each
20 | $$p_{i}$$ is a binary variable that can be true or false.
21 |
22 | Sentences are defined recursively as follows:
23 | - A propositional variable is a sentence (this is known as an
24 | atomic sentence)
25 | - If $$\alpha$$ and $$\beta$$ are sentences, then $$\neg\alpha$$,
26 | $${\left(\alpha\lor\beta\right)}$$, and
27 | $${\left(\alpha\land\beta\right)}$$ are valid sentences
28 |
29 | For example,
30 | $${\left({\left({\alpha}\lor\beta\right)}\land{\alpha}\right)}$$ is a
31 | valid sentence while $${\left({\alpha}\lor\beta\land{\alpha}\right)}$$
32 | isn’t a valid sentence.
33 |
34 | ## Semantics
35 |
36 | We have to define the notion of a world. A world $${\omega}$$ is an
37 | assignment of a truth value to each propositional variable. Note that
38 | there are $$2^{n}$$ possible worlds if we have $$n$$ propositional
39 | variables.
40 |
41 | We determine if a sentence $${\alpha}$$ is true in a world $${\omega}$$
42 | using the following recursive rules:
43 | - $${\omega}\models p_{i}$$ if and only if $${\omega}(p_{i})$$ is true
44 | - $${\omega}\models\neg{\alpha}$$ if and only if
45 | $${\omega}\not\models{\alpha}$$
46 | - $${\omega}\models{\left({\alpha}\lor\beta\right)}$$ if and only if
47 | ($${\omega}\models{\alpha}$$) OR ($${\omega}\models\beta$$)
48 | - $${\omega}\models{\left({\alpha}\land\beta\right)}$$ if and only if
49 | ($${\omega}\models{\alpha}$$) AND ($${\omega}\models\beta$$)
50 | - (Note that $${\alpha}\Rightarrow\beta$$ is syntactic sugar for
51 | $$\neg\alpha\lor\beta$$, and we can use the rules above)
52 |
53 | For example, $${\left({\alpha}\lor\neg{\alpha}\right)}$$ is true in any
54 | world while $${\left({\alpha}\land\neg{\alpha}\right)}$$ is never true in
55 | any world.
56 |
57 | ## Knowledge Base
58 |
59 | A knowledge base is a collection of sentences
60 | $${\alpha}_{1},{\alpha}_{2},...,{\alpha}_{k}$$ that we know are true, i.e.
61 | $${\alpha}_{1}\land{\alpha}_{2}\land...\land{\alpha}_{k}$$. The sentences
62 | in the knowledge base allows us to compactly specify the allowable
63 | states of the world. By adding new sentences, the number of possible
64 | worlds consistent with our knowledge base could decrease (if we gain new
65 | information), go to zero (if the new sentence is inconsistent with the
66 | existing knowledge base), or remain the same (if the new sentence is
67 | entailed by existing sentences).
68 |
69 | 
70 |
71 | *Figure: By adding $$\Delta$$ to our KB $$\Gamma$$, the number of possible
72 | worlds can (i) decrease, (ii) go to zero, or (iii) remain the same.*
73 |
74 | We will make use of the following definitions:
75 | - A sentence is satisfiable (consistent) if there exists at least one
76 | world that makes the sentence true, i.e. $$Models({\alpha})\ne\phi$$.
77 | - A sentence is valid if the sentence is true in every possible
78 | world, i.e. $$Models({\alpha})={\Omega}=\{0,1\}^{n}$$.
79 | - Two sentences are equivalent if they are true in the same
80 | models, e.g. $${\alpha}\lor\beta$$ and $$\beta\lor\alpha$$.
81 |
82 | # Satisfiability
83 |
84 | ## Inference reduces to satisfiability
85 |
86 | The basic inference problem in the propositional logic system is the
87 | following: given the knowledge that we have about the world, is it the
88 | case that some property must always hold? In other words, what
89 | conclusion can we reach about some other sentence? The three possible
90 | cases that could result are always true, depends, and always false.
91 |
92 | Inference problem: Does $${\alpha}\Rightarrow\beta$$? We can solve this by
93 | checking if $${\alpha}\land\neg\beta$$ is satisfiable.
94 | 1. If not satisfiable, then we know that $${\alpha}\Rightarrow\beta$$.
95 | This is similar to the idea behind proof by contradiction.
96 | 2. If satisfiable, then there are two possible cases. Check if
97 | $${\alpha}\land\beta$$ is satisfiable. If $${\alpha}\land\beta$$ is
98 | satisfiable, then there is no definitive answer. If
99 | $${\alpha}\land\beta$$ is not satisfiable, then we know that
100 | $${\alpha}\Rightarrow\neg\beta$$.
101 |
102 | Many problems across different domains can be encoded using
103 | propositional logic. Inference in this space basically boils down to
104 | checking satisfiability. For examples, problems such as solving Sudoku
105 | puzzles, hardware verification, and planning can be encoded as
106 | satisfiability problems and then solved efficiently using state of the
107 | art satisfiability solvers. Since a lot of research has gone into
108 | improving general SAT solvers, if some domain-specific problem has an
109 | efficient transformation into an instance of satisfiability checking,
110 | then SAT solvers can often solve these domain-specific problems
111 | efficiently.
112 |
113 | ## Conjunctive normal form
114 |
115 | To check the satisfiability of a sentence, the first step is to convert
116 | sentences into normal form which is easier for satisfiability solvers to
117 | handle. A sentence is in conjunctive normal form (also known as clausal
118 | norm form or CNF) if a sentence is a conjunction of disjunctions.
119 |
120 | Given any formula, we can always convert it into CNF using the following
121 | procedure:
122 | 1. The first step is to remove syntactic sugar, e.g. change
123 | $${\alpha}\Rightarrow\beta$$ into $$\neg{\alpha}\lor\beta$$. After this
124 | step, we should only have negations, conjunctions, and disjunctions.
125 | 2. Push negations in. Negations should only appear in front of
126 | propositional variables. For example, change $$\neg\neg{\alpha}$$ into
127 | $${\alpha}$$ and $$\neg({\alpha}\lor\beta)$$ into
128 | $${\left(\neg{\alpha}\land\neg\beta\right)}$$.
129 | 3. Distribute disjunctions over conjunctions, e.g. change
130 | $${\alpha}\lor(\beta\land\gamma)$$ into
131 | $$({\alpha}\lor\beta)\land({\alpha}\lor\gamma)$$. (Note that
132 | converting a sentence which is composed of many disjunctions of
133 | conjunctions into CNF using this procedure can create an exponential
134 | number of clauses.)
135 |
136 | Checking if an assignment satisfies a CNF formula is easy. A clause is
137 | satisfiable if and only if at least one of the literals (a variable or
138 | the negation of a variable) is set to true. A sentence is satisfiable if
139 | and only if each clause is satisfiable.
140 |
141 | Suppose we want to test the satisfiability of a formula $$\Delta$$ in CNF.
142 | Consider what happens when we set a propositional variable $$P$$ to true.
143 | If P does not belong to a given clause, then that clause is unaffected.
144 | If P belongs to a given clause, then that clause becomes satisfied and
145 | we can remove that clause from the formula. The resulting formula can be
146 | expressed as
147 | $$\Delta\vert P=\{\alpha\backslash\{\neg P\}\mid\alpha\in\Delta,P\notin\alpha\}$$.
148 |
--------------------------------------------------------------------------------
/probabilistic/generalizing/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: post
3 | title: Generalizing satisfiability problems
4 | ---
5 |
6 | In propositional logic, we used hard constraints to specify our system.
7 | Today, we will consider several generalizations of propositional logic
8 | and the Boolean satisfiability problem that allow us to formulate more
9 | complex problems. When considering the problems that can be encoded in a
10 | given framework, we will see we have the following spectrum of
11 | representational power, from least to greatest: SAT $$\rightarrow$$
12 | Weighted MAX-SAT $$\rightarrow$$ Markov logic networks $$\to$$ Factor
13 | graphs.
14 |
15 | # Minimum Vertex Cover (Weighted MAX-SAT)
16 |
17 | Given an undirected graph $$G=(V,E)$$, we want to find a subset of
18 | vertices $$U\subseteq V$$ such that for each $$(i,j)\in E$$, either $$i\in U$$
19 | or $$j\in U$$ (every edge is incident to one of the vertices in $$U$$). The
20 | minimum vertex cover problem is finding a smallest vertex cover. A
21 | trivial vertex cover would be the set of all vertices. A real-life
22 | example is a graph where the edges represent roads and finding a vertex
23 | cover could represent installing a small number of cameras at the
24 | intersections which cover all of the roads.
25 |
26 | Consider formulating this problem the following way. We introduce one
27 | Boolean variable for each vertex, such that $$X_{i}=1$$ iff $$i\in U$$. Then
28 | for each $$(i,j)\in E$$, we introduce the clause
29 | $${\left(X_{i}\lor X_{j}\right)}$$. We want to minimize $$\sum_{i}X_{i}$$.
30 | The hard requirement is finding a valid vertex cover while the soft
31 | requirement is minimizing the size of the vertex cover.
32 |
33 | An extension of SAT is Weighted MAX-SAT where we add weights to each
34 | clause. For any assignment $${\sigma}\in\{0,1\}^{n}$$, define
35 | $$score({\sigma})$$ to be the sum of the weights of the clauses that
36 | $${\sigma}$$ satisfies and $$cost({\sigma})$$ to be the sum of the weights
37 | of the clauses that $${\sigma}$$ does not satisfy. Then the optimization
38 | problem is finding the assignment $${\sigma}$$ with maximum score or
39 | minimum cost.
40 |
41 | To formulate the minimum vertex cover problem as a Weighted MAX-SAT
42 | problem, we find the assignment $${\sigma}$$ that minimizes the cost of
43 | the following optimization problem: for each $$(i,j)\in E$$,
44 | $${\left(X_{i}\lor X_{j}\right)}$$ with weight $$\infty$$ and for each
45 | $$i\in V$$, $$\neg X_{i}$$ with weight 1.
46 |
47 | To solve Weighted MAX-SAT, we can adapt solvers for SAT. Local search
48 | algorithms and greedy heuristics can use the cost or score function when
49 | deciding between possible moves. During systematic search, when
50 | exploring the search tree, solvers can use branch and bound techniques
51 | when deciding which subtrees to prune. This involves keeping track of
52 | lower bounds or upper bounds based on the best solution found so far and
53 | guesses for bounds in the current subtree for either the cost or score
54 | function. To get the bounds for the subtrees, one technique is relaxing
55 | the domain of an integer optimization problem from $$x\in\{0,1\}$$ to
56 | $$x\in[0,1]$$ and solving the easier optimization problem.
57 |
58 | # Markov logic networks
59 |
60 | Propositional logic fails when some statements in the knowledge base are
61 | inconsistent. Instead of having logical statements which are either true
62 | or false in a world, we want to generalize to statements which hold with
63 | some weight (probability). Markov logic networks are such a
64 | generalization, combining logic and probability.
65 |
66 | The idea is to encode our problem using soft constraints among the
67 | variables in our model. By specifying weights on the soft constraints,
68 | we will then have a compact way to represent the most likely worlds
69 | consistent with our knowledge.
70 |
71 | For any configuration (state), defined by an assignment of values to the
72 | variables in our model, we can compute the weight of that configuration.
73 | Formally, we define a cost function, mapping each configuration
74 | $$x\in\{0,1\}^{n}\to\text{cost}(x)$$, and we let the weight of a
75 | configuration be equal to $$w(x)=\exp{\left(-\text{cost}(x)\right)}$$.
76 |
77 | Define
78 | $$p(x)=\frac{w(x)}{Z}=\frac{1}{Z}\exp{\left(-\text{cost}(x)\right)}=\frac{1}{Z}\exp{\left(-\sum_{\text{clauses }c}w_{c}1_{c}(x)\right)}$$
79 | where $$1_{c}(x)$$ is an indicator function which is 1 if clause $$c$$ is
80 | violated. By defining the normalization constant
81 | $$Z=\sum_{x\in\{0,1\}^{n}}\exp{\left(-\text{cost}(x)\right)}$$, we ensure
82 | we have valid probabilities satisfying $$0\leq p(x)\leq1$$ and
83 | $$\sum_{x\in\{0,1\}^{n}}p(x)=1$$.
84 |
85 | We see that the probability of an assignment factorizes over the
86 | clauses:
87 |
88 | \$$
89 | \begin{eqnarray*}
90 | p(x) & = & \frac{1}{Z}\exp{\left(-\sum_{c}w_{c}1_{c}(x)\right)}\\
91 | & = & \frac{1}{Z}\prod_{c}\phi_{c}(x_{c})
92 | \end{eqnarray*}
93 | \$$
94 | {: style="text-align: center"}
95 |
96 | for an appropriately defined $$\phi_{c}(x_{c})$$, one factor for each
97 | clause $$c$$.
98 |
99 | In Markov logic networks, a basic optimization problem is finding the
100 | configuration $$X$$ satisfying $${\mathrm{argmax}}_{X}P(X\mid E)$$ for some
101 | evidence $$E$$. Note that
102 | $${\mathrm{argmax}}_{x}\log p(x\mid y)={\mathrm{argmax}}_{x}\big[-\log(Z)-\text{cost}(x,y)\big]$$
103 | is equivalent to $${\mathrm{argmin}}_{x}\text{cost}(x,y)$$.
104 |
105 | # Factor graphs
106 |
107 | An arbitrary probability distribution over $$n$$ binary variables requires
108 | $$2^{n}-1$$ parameters to represent. Factor graphs allow us to study
109 | probability distributions which factorize and can be represented in a
110 | compact way.
111 |
112 | The basic setup for factor graphs is for each $$x\in\{0,1\}^{n}$$, let
113 | $$p(x)=\frac{1}{Z}\prod_{c}\phi_{c}(x_{c})$$ over all cliques $$c$$ where
114 | each $$\phi_{c}(x_{c})$$ is a function which maps
115 | $$\{0,1\}^{|c|}\to{\mathbb{R}}^{+}$$ .
116 |
117 | We can encode satisfiability problems into the factor graph
118 | representation as follows. Given a CNF formula, we can represent each
119 | clause by a clique which is a function of the variables in that clause.
120 | If the clause is satisfied by some partial assignment $$x_{c}$$, we let
121 | $$\phi_{c}(x_{c})=1$$, and otherwise we let $$\phi_{c}(x_{c})=0$$. Then an
122 | assignment $$x$$ that satisfies all of the clauses will have
123 | $$\prod_{c}\phi_{c}(x_{c})=1$$ and otherwise will have
124 | $$\prod_{c}\phi_{c}(x_{c})=0$$.
125 |
126 | In factor graphs, some probabilistic reasoning tasks that we might want
127 | to perform are as follows:
128 | 1. Marginal and conditional probabilities:
129 | $$p(x_{A}=\overline{x_{A}}\mid x_{B}=\overline{x_{B}})$$
130 | 2. Sampling: $$x\sim p(x)$$
131 | 3. Compute the partition function $$Z=\sum_{x}\prod_{c}\phi_{c}(x)$$ (the
132 | number of satisfying assignments in SAT problems)
133 | 4. Most likely state of the world given evidence: $$\max_{x}p(x\mid y)$$
134 |
135 | **Theorem**: Problems 1 through 3 are reducible to one another.
136 |
137 | ***Proof*** ($$3\Rightarrow1$$): If we can compute partition functions,
138 | then we can compute
139 | $$P(x_{1}=T)=\frac{\sum_{x\in SAT}{\mathbb{I}}[x_{1}=T]}{Z}$$ by first
140 | computing the partition function $$Z$$ of the original graphical model,
141 | clamping the variable $$x_{1}=T$$, and then counting the number of
142 | satisfying assignments with $$x_{1}=T$$. The marginal probability is the
143 | ratio of the total weight in the subtree rooted at $$x_{1}=T$$ and the
144 | total weight of the tree.
145 |
146 | ***Proof*** ($$1\Rightarrow3$$): If we can compute all marginal
147 | probabilities, then we can compute the partition function $$Z$$ as
148 | follows. Rewrite
149 | $$Z=\frac{Z}{Z(x_{1}=T)}\frac{Z(x_{1}=T)}{Z(x_{1}=T,x_{2}=T)}...Z(x_{1}=T,...,x_{n}=F)$$
150 | where we choose the assignments $$x_{1},...,x_{n}$$ so that the partition
151 | function counts are non-zero and the ratios above are inverse ratios of
152 | marginal probabilities.
153 |
154 | ***Proof*** ($$2\Rightarrow1$$): If we can draw samples
155 | $$X_{1},...,X_{T}\sim p(x)$$, then we can compute the marginal probability
156 | $$P(x_{1}=\text{True})$$ as the empirical frequencies
157 | $$\hat{g}(X_{1},...,X_{T})=\frac{1}{T}\sum_{t=1}^{T}{\mathbb{I}}[X_{t}[1]=\text{True}]$$
158 | for $$T$$ sufficiently large.
159 |
160 | ***Proof*** ($$1\Rightarrow2$$): If we can evaluate marginal
161 | probabilities, then we can sample. Start at the root of the tree. Say
162 | this is variable $$x_{1}$$. We compute the marginal probability that
163 | $$x_{1}$$ is true or false. We draw a random number
164 | $$r\sim\text{Uniform(0,1)}$$ and go down the branch $$x_{1}=T$$ if
165 | $$r
0$$ for some $$n$$, meaning you can go from any state to any
75 | other state for a large enough $$n$$) and the Markov chain is aperiodic
76 | (there exists $$n$$ such that for all $$n'\geq n$$,
77 | $$P(x_{n'}=i\mid x_{0}=i)>0$$).
78 |
79 | $$\Pi^{*}$$ is a stationary probability distribution if
80 | $$\Pi^{*}=\Pi^{*}P$$. In general, stationary probability distributions are
81 | not unique. The irreducibility condition guarantees that the stationary
82 | distribution is unique. For an example of what happens when we do not
83 | have irreducibility, consider the case of two nodes with self-loops with
84 | $$P=\begin{bmatrix}1 & 0\\
85 | 0 & 1
86 | \end{bmatrix}$$.
87 |
88 | $$\Pi^{*}$$ is the limiting distribution if for every initial probability
89 | distribution $$\Pi^{0}$$, $$\lim_{n\to\infty}\Pi^{n}=\Pi^{*}$$. The
90 | aperiodicity condition is necessary for the limit to exist. (Comment:
91 | This is a technical condition. In practice, we might not need
92 | aperiodicity.) For an example of what happens when we do not have
93 | aperiodicity, consider $$P=\begin{bmatrix}0 & 1\\
94 | 1 & 0
95 | \end{bmatrix}$$.
96 |
97 | # Markov chains - proof of convergence
98 |
99 | We will prove that if the Markov chain is irreducible and aperiodic,
100 | then there exists a stationary distribution, the stationary distribution
101 | is unique, and the Markov chain will converge to the stationary
102 | distribution (note the Perron-Frobenius theorem).
103 |
104 | If the Markov chain is irreducible and aperiodic, then the Markov chain
105 | is primitive ($$\exists k$$ such that $$[P^{k}]_{ij}>0$$). $$[P^{k}]_{ij}$$
106 | tells us the probability of going from state $$i$$ to state $$j$$ in exactly
107 | $$k$$ steps. To see this, note that if the Markov chain is irreducible, it
108 | means we can go from any node to any other node in a finite number of
109 | steps (possibly depending on $$i,j$$). We want $$k$$ independent of $$i,j$$.
110 | To do this, we can add self-loops which are guaranteed to exist by the
111 | aperiodicity condition.
112 |
113 | Without loss of generality, let us consider the case where $$[P]_{ij}>0$$.
114 | We will prove that $$P^{n}\to W$$ as $$n\to\infty$$ for a $$W$$ where all of
115 | the rows of $$W$$ are identical.
116 |
117 | The intuition is that the stochastic matrix is doing averaging. The
118 | biggest and smallest elements of the vector get closer to each other
119 | because of the weighted average (contraction). Eventually, the vector
120 | will converge to a constant.
121 |
122 | Let $$P$$ be a $$r\times r$$ transition probability matrix with no zero
123 | entries. Let $$d>0$$ be the smallest entry of $$P$$.
124 |
125 | Let $$y\in{\mathbb{R}}^{r}$$ be a vector with largest component $$M_{0}$$
126 | and smallest component $$m_{0}$$. Similarly, define $$M_{1}$$ to be the
127 | largest component and $$m_{1}$$ to be the smallest component of
128 | $$Py\in{\mathbb{R}}^{r}$$.
129 |
130 | We will show that $$(M_{1}-m_{1})\leq(1-2d)(M_{0}-m_{0})$$. To prove this
131 | bound, let us consider a “worst-case” vector which will give us the
132 | tightest bound. Consider a vector $$y$$ which is $$M_{0}$$ everywhere except
133 | $$m_{0}$$ at the spot corresponding to the entry with $$d$$ in $$P$$. Thus, we
134 | see that the biggest $$M_{1}$$ can be is $$M_{1}\leq dm_{0}+(1-d)M_{0}$$.
135 | Similarly, the smallest $$m_{1}$$ can be is $$m_{1}\geq dM_{0}+(1-d)m_{0}$$.
136 | Subtracting the two inequalities gives the desired bound.
137 |
138 | Let $$y\in{\mathbb{R}}^{r}$$ be an arbitrary vector. Let us study the
139 | sequence $$P^{n}y$$.
140 |
141 | \$$
142 | \begin{eqnarray*}
143 | & M_{0}\geq M_{1}\geq M_{2}\geq...\\
144 | & m_{0}\leq m_{1}\leq m_{2}\leq...\\
145 | & m_{0}\leq m_{n}\leq M_{n}\leq M_{0}
146 | \end{eqnarray*}
147 | \$$
148 | {: style="text-align: center"}
149 |
150 | Since the sequences are bounded and monotonic, $$M_{n}\to M$$ and
151 | $$m_{n}\to m$$ must converge.
152 |
153 | We also know that
154 |
155 | \$$
156 | \begin{eqnarray*}
157 | M_{n}-m_{n} & \leq & (1-2d)(M_{n-1}-m_{n-1})\\
158 | & \leq & (1-2d)^{n}(M_{0}-m_{0})
159 | \end{eqnarray*}
160 | \$$
161 | {: style="text-align: center"}
162 |
163 | If $$r\geq2$$, then $$d\leq\frac{1}{2}$$ and $$0\leq1-2d<1$$, so as
164 | $$n\to\infty$$, $$M_{n}-m_{n}\to0$$ and so we have $$M=m$$. Therefore,
165 | $$\lim_{n\to\infty}P^{n}y=\mu$$, a vector $$\mu$$ where all of the entries
166 | in $$\mu$$ are the same.
167 |
168 | In particular, we can let $$y=e_{j}$$. $$P^{n}y\to w_{j}$$, a vector $$w_{j}$$
169 | where all of the entries in $$w_{j}$$ are the same. This concludes the
170 | proof that $$P^{n}\to W$$ as $$n\to\infty$$ for a $$W$$ where all of the rows
171 | of $$W$$ are identical. The interpretation of this limit is that
172 | regardless of the initial state we start in, the limiting probability
173 | distribution is the same. If $$\Pi^{0}$$ is any probability distribution,
174 | then $$\lim_{n\to\infty}\Pi^{0}P^{n}=\Pi^{0}W=w$$ where $$w$$ is any row of
175 | $$W$$.
176 |
177 | We will now show that $$w$$ is a stationary distribution of $$P$$. Let
178 | $$W=\lim_{n\to\infty}P^{n}$$. Since $$P^{n+1}=P^{n}P$$, by taking limits on
179 | both sides, we get $$W=WP$$. Row-wise, $$w=wP$$. We also see that $$w$$ is a
180 | left eigenvector of $$P$$ with an eigenvalue of 1.
181 |
--------------------------------------------------------------------------------
/fonts/icomoon.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/css/tufte.orginal.css:
--------------------------------------------------------------------------------
1 | ---
2 | # this ensures Jekyll reads the file to be transformed into CSS later
3 | # only Main files contain this front matter, not partials.
4 | nav_exclude: true
5 | ---
6 | /*****************************************************************************
7 | /*
8 | /* Tufte Jekyll blog theme
9 | /* Based on Tufte CSS by Dave Liepmann ( https://github.com/edwardtufte/tufte-
10 | /*
11 | /* The README.md will show you how to set up your site along with other goodie
12 | /*****************************************************************************/
13 |
14 | // Imports to create final
15 |
16 | @import "../_sass/fonts";
17 | @import "../_sass/settings";
18 | @import "../_sass/syntax-highlighting";
19 |
20 | /* Tufte CSS styles */
21 | html { font-size: 15px; }
22 |
23 | body { width: 87.5%;
24 | margin-left: auto;
25 | margin-right: auto;
26 | padding-left: 12.5%;
27 | font-family: et-book, Palatino, "Palatino Linotype", "Palatino LT STD", "Book Antiqua", Georgia, serif;
28 | background-color: #fffff8;
29 | color: #111;
30 | max-width: 1400px;
31 | counter-reset: sidenote-counter; }
32 |
33 | h1 { font-weight: 400;
34 | margin-top: 4rem;
35 | margin-bottom: 1.5rem;
36 | font-size: 3.2rem;
37 | line-height: 1; }
38 |
39 | h2 { font-style: italic;
40 | font-weight: 400;
41 | margin-top: 2.1rem;
42 | margin-bottom: 0;
43 | font-size: 2.2rem;
44 | line-height: 1; }
45 |
46 | h3 { font-style: italic;
47 | font-weight: 400;
48 | font-size: 1.7rem;
49 | margin-top: 2rem;
50 | margin-bottom: 0;
51 | line-height: 1; }
52 |
53 | p.subtitle { font-style: italic;
54 | margin-top: 1rem;
55 | margin-bottom: 1rem;
56 | font-size: 1.8rem;
57 | display: block;
58 | line-height: 1; }
59 |
60 | .numeral { font-family: et-book-roman-old-style; }
61 |
62 | .danger { color: red; }
63 |
64 | article { position: relative;
65 | padding: 5rem 0rem; }
66 |
67 | section { padding-top: 1rem;
68 | padding-bottom: 1rem; }
69 |
70 | p, ol, ul { font-size: 1.4rem; }
71 |
72 | p { line-height: 2rem;
73 | margin-top: 1.4rem;
74 | margin-bottom: 1.4rem;
75 | padding-right: 0;
76 | vertical-align: baseline; }
77 |
78 | /* Chapter Epigraphs */
79 | div.epigraph { margin: 5em 0; }
80 |
81 | div.epigraph > blockquote { margin-top: 3em;
82 | margin-bottom: 3em; }
83 |
84 | div.epigraph > blockquote, div.epigraph > blockquote > p { font-style: italic; }
85 |
86 | div.epigraph > blockquote > footer { font-style: normal; }
87 |
88 | div.epigraph > blockquote > footer > cite { font-style: italic; }
89 |
90 | /* end chapter epigraphs styles */
91 |
92 | blockquote { font-size: 1.4rem; }
93 |
94 | blockquote p { width: 50%; }
95 |
96 | blockquote footer { width: 50%;
97 | font-size: 1.1rem;
98 | text-align: right; }
99 |
100 | ol, ul { width: 45%;
101 | -webkit-padding-start: 5%;
102 | -webkit-padding-end: 5%; }
103 |
104 | li { padding: 0.5rem 0; }
105 |
106 | figure { padding: 0;
107 | border: 0;
108 | font-size: 100%;
109 | font: inherit;
110 | vertical-align: baseline;
111 | max-width: 55%;
112 | -webkit-margin-start: 0;
113 | -webkit-margin-end: 0;
114 | margin: 0 0 3em 0; }
115 |
116 | figcaption { float: right;
117 | clear: right;
118 | margin-right: -48%;
119 | margin-top: 0;
120 | margin-bottom: 0;
121 | font-size: 1.1rem;
122 | line-height: 1.6;
123 | vertical-align: baseline;
124 | position: relative;
125 | max-width: 40%; }
126 |
127 | figure.fullwidth figcaption { margin-right: 24%; }
128 |
129 | /* Links: replicate underline that clears descenders */
130 | a:link, a:visited { color: inherit; }
131 |
132 | a:link { text-decoration: none;
133 | background: -webkit-linear-gradient(#fffff8, #fffff8), -webkit-linear-gradient(#fffff8, #fffff8), -webkit-linear-gradient(#333, #333);
134 | background: linear-gradient(#fffff8, #fffff8), linear-gradient(#fffff8, #fffff8), linear-gradient(#333, #333);
135 | -webkit-background-size: 0.05em 1px, 0.05em 1px, 1px 1px;
136 | -moz-background-size: 0.05em 1px, 0.05em 1px, 1px 1px;
137 | background-size: 0.05em 1px, 0.05em 1px, 1px 1px;
138 | background-repeat: no-repeat, no-repeat, repeat-x;
139 | text-shadow: 0.03em 0 #fffff8, -0.03em 0 #fffff8, 0 0.03em #fffff8, 0 -0.03em #fffff8, 0.06em 0 #fffff8, -0.06em 0 #fffff8, 0.09em 0 #fffff8, -0.09em 0 #fffff8, 0.12em 0 #fffff8, -0.12em 0 #fffff8, 0.15em 0 #fffff8, -0.15em 0 #fffff8;
140 | background-position: 0% 93%, 100% 93%, 0% 93%; }
141 |
142 | @media screen and (-webkit-min-device-pixel-ratio: 0) { a:link { background-position-y: 87%, 87%, 87%; } }
143 |
144 | a:link::selection { text-shadow: 0.03em 0 #b4d5fe, -0.03em 0 #b4d5fe, 0 0.03em #b4d5fe, 0 -0.03em #b4d5fe, 0.06em 0 #b4d5fe, -0.06em 0 #b4d5fe, 0.09em 0 #b4d5fe, -0.09em 0 #b4d5fe, 0.12em 0 #b4d5fe, -0.12em 0 #b4d5fe, 0.15em 0 #b4d5fe, -0.15em 0 #b4d5fe;
145 | background: #b4d5fe; }
146 |
147 | a:link::-moz-selection { text-shadow: 0.03em 0 #b4d5fe, -0.03em 0 #b4d5fe, 0 0.03em #b4d5fe, 0 -0.03em #b4d5fe, 0.06em 0 #b4d5fe, -0.06em 0 #b4d5fe, 0.09em 0 #b4d5fe, -0.09em 0 #b4d5fe, 0.12em 0 #b4d5fe, -0.12em 0 #b4d5fe, 0.15em 0 #b4d5fe, -0.15em 0 #b4d5fe;
148 | background: #b4d5fe; }
149 |
150 | /* Sidenotes, margin notes, figures, captions */
151 | img { max-width: 100%; }
152 |
153 | .sidenote, .marginnote { float: right;
154 | clear: right;
155 | margin-right: -60%;
156 | width: 50%;
157 | margin-top: 0;
158 | margin-bottom: 0;
159 | font-size: 1.1rem;
160 | line-height: 1.3;
161 | vertical-align: baseline;
162 | position: relative; }
163 |
164 | .table-caption { float:right;
165 | clear:right;
166 | margin-right: -60%;
167 | width: 50%;
168 | margin-top: 0;
169 | margin-bottom: 0;
170 | font-size: 1.0rem;
171 | line-height: 1.6; }
172 |
173 | .sidenote-number { counter-increment: sidenote-counter; }
174 |
175 | .sidenote-number:after, .sidenote:before { content: counter(sidenote-counter) " ";
176 | font-family: et-book-roman-old-style;
177 | position: relative;
178 | vertical-align: baseline; }
179 |
180 | .sidenote-number:after { content: counter(sidenote-counter);
181 | font-size: 1rem;
182 | top: -0.5rem;
183 | left: 0.1rem; }
184 |
185 | .sidenote:before { content: counter(sidenote-counter) " ";
186 | top: -0.5rem; }
187 |
188 | p, footer, table, div.table-wrapper-small, div.supertable-wrapper > p, div.booktabs-wrapper { width: 55%; }
189 |
190 | div.fullwidth, table.fullwidth { width: 100%; }
191 |
192 | div.table-wrapper { overflow-x: auto;
193 | font-family: "Trebuchet MS", "Gill Sans", "Gill Sans MT", sans-serif; }
194 |
195 | @media screen and (max-width: 760px) { p, footer { width: 90%; }
196 | pre.code { width: 87.5%; }
197 | ul { width: 85%; }
198 | figure { max-width: 90%; }
199 | figcaption, figure.fullwidth figcaption { margin-right: 0%;
200 | max-width: none; }
201 | blockquote p, blockquote footer { width: 90%; }}
202 |
203 | .sans { font-family: "Gill Sans", "Gill Sans MT", Calibri, sans-serif;
204 | letter-spacing: .03em; }
205 |
206 | .code { font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
207 | font-size: 1.125rem;
208 | line-height: 1.6; }
209 |
210 | h1 .code, h2 .code, h3 .code { font-size: 0.80em; }
211 |
212 | .marginnote .code, .sidenote .code { font-size: 1rem; }
213 |
214 | pre.code { width: 52.5%;
215 | padding-left: 2.5%;
216 | overflow-x: auto; }
217 |
218 | .fullwidth { max-width: 90%;
219 | clear:both; }
220 |
221 | span.newthought { font-variant: small-caps;
222 | font-size: 1.2em; }
223 |
224 | input.margin-toggle { display: none; }
225 |
226 | label.sidenote-number { display: inline; }
227 |
228 | label.margin-toggle:not(.sidenote-number) { display: none; }
229 |
230 | @media (max-width: 760px) { label.margin-toggle:not(.sidenote-number) { display: inline; }
231 | .sidenote, .marginnote { display: none; }
232 | .margin-toggle:checked + .sidenote,
233 | .margin-toggle:checked + .marginnote { display: block;
234 | float: left;
235 | left: 1rem;
236 | clear: both;
237 | width: 95%;
238 | margin: 1rem 2.5%;
239 | vertical-align: baseline;
240 | position: relative; }
241 | label { cursor: pointer; }
242 | pre.code { width: 90%;
243 | padding: 0; }
244 | .table-caption { display: block;
245 | float: right;
246 | clear: both;
247 | width: 98%;
248 | margin-top: 1rem;
249 | margin-bottom: 0.5rem;
250 | margin-left: 1%;
251 | margin-right: 1%;
252 | vertical-align: baseline;
253 | position: relative; }
254 | div.table-wrapper, table, table.booktabs { width: 85%; }
255 | div.table-wrapper { border-right: 1px solid #efefef; }
256 | img { width: 100%; } }
257 |
--------------------------------------------------------------------------------
/logic/inference/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: post
3 | title: 'Inference - satisfiability solvers'
4 | ---
5 |
6 | ## Brute force
7 |
8 | 
9 |
10 | *Figure from Chapter 3, page 1 of draft chapter on satisfiability by
11 | Adnan Darwiche*
12 |
13 | The brute force approach to checking satisfiability is to go through all
14 | possible worlds and check if the formula is satisfied. For example, a
15 | brute force approach for testing the satisfiability of some formula with
16 | three variables A, B, and C would visit the possible worlds
17 | $$w_{1},w_{2},...,w_{8}$$ in some order and check if the formula is
18 | satisfied in any of the eight worlds. To prove unsatisfiability, we
19 | would need to try all possible worlds. In the worst case, checking
20 | satisfiability is NP-complete because the number of possible worlds is
21 | exponential in the number of variables, and any kind of reasoning
22 | algorithm would take exponential time. Nonetheless, in practice, one can
23 | usually solve many kinds of real-world problems tractably using the
24 | techniques discussed below.
25 |
26 | ## Early stopping
27 |
28 | One improvement over brute force search is to do early stopping. We can
29 | visualize the search process as traversing a binary tree where each node
30 | corresponds to a variable and the left branch corresponds to setting
31 | that variable to true and the right branch corresponds to setting that
32 | variable to false. After making assignments to a couple of variables, we
33 | might be able to declare early success if we find a satisfying
34 | assignment. In addition, whenever we encounter a formula that is already
35 | unsatisfiable, we backtrack and avoid the need to search the subtree
36 | rooted at that node.
37 |
38 | Note that this algorithm requires us to choose a variable ordering. Each
39 | time we go down a branch and make a variable assignment, we can simplify
40 | the CNF. At every step, we check if there exists an empty clause which
41 | means that the formula cannot be satisfied. If we eliminate all of the
42 | clauses, then the formula is satisfiable.
43 |
44 | 
45 |
46 | *Figure from Chapter 3, page 2 of draft chapter on satisfiability by
47 | Adnan Darwiche*
48 |
49 | Going back to our earlier example with three variables A, B, and C, we
50 | traverse the tree starting from the root and check if the formula is
51 | satisfied or unsatisfiable each time we make an assignment to a
52 | variable. For the hypothetical formula
53 | $${\left(\neg A\right)}\land{\left(B\lor C\right)}$$, upon setting $$A$$ to
54 | true, we know that the formula becomes unsatisfiable, so we can
55 | immediately backtrack and set $$A$$ to false. Then when we set $$B$$ to
56 | true, we can immediately conclude that the formula is satisfiable.
57 |
58 | ## Unit resolution
59 |
60 | Whenever there is a clause with only one literal, then the only way to
61 | satisfy the formula is to set the corresponding variable so that clause
62 | is true. For example, if $${\Delta}$$ consists of
63 | $$\{B\},\{\neg B\lor\neg C\},\{C\lor\neg D\}$$, then we must set B to
64 | true, then set C to false, and finally D to false.
65 |
66 | Another improvement is if a variable only appears in the formula in its
67 | negated or unnegated form, then we know how to set the value of that
68 | variable.
69 |
70 | ## DPLL algorithm
71 |
72 | The DPLL algorithm combines DFS with early stopping, unit propagation
73 | and pure literals. With these heuristics, DPLL is a very effective
74 | backtracking SAT solver. The recursive algorithm for DPLL is as follows.
75 |
76 | $$DPLL(\phi,{\alpha})$$:
77 | 1. If $$\phi\vert{\alpha}$$ is empty, return satisfiable.
78 | 2. If $$\phi\vert{\alpha}$$ contains an empty clause,
79 | return unsatisfiable.
80 | 3. If $$\phi\vert{\alpha}$$ contains a unit clause $$\{p\}$$, return
81 | $$DPLL(\phi,{\alpha}p)$$.
82 | 4. If $$\phi\vert{\alpha}$$ has a pure literal $$p$$, return
83 | $$DPLL(\phi,{\alpha}p)$$.
84 | 5. Let $$p$$ be a literal from a minimum size clause of
85 | $$\phi\vert{\alpha}$$. If $$DPLL(\phi,{\alpha}p)$$ returns satisfiable,
86 | return satisfiable. Else, return $$DPLL(\phi,{\alpha}\neg p)$$.
87 |
88 | ## Clause Learning
89 |
90 | Fundamentally, the biggest limitation from a backtracking solver like
91 | DPLL is that you don’t learn from your mistakes so you might make the
92 | same mistakes over and over. For example, suppose that the variable
93 | ordering we choose for DPLL is unlucky in that the decision we make for
94 | the first variable leads the rest of the clauses to be inconsistent but
95 | unit propagation cannot detect this so we end up visiting each
96 | assignment to the variables that is not a satisfying assignment.
97 |
98 | Clause learning adds clauses that are implied by the knowledge base,
99 | which are discovered during the search process, to the knowledge base.
100 | Adding clauses that are implied by the knowledge base, particularly
101 | short clauses, can empower unit propagation and allow a different way of
102 | searching the tree that is non-chronological.
103 |
104 | We will use a graph-based data structure, called an implication graph,
105 | that has nodes corresponding to variable assignments (either decisions
106 | we make when branching or implications via unit propagation). The
107 | directed edges in the graph will record dependencies among the variable
108 | assignments. Each node in the implication graph has the form $$l/V=v$$
109 | which means that at level $$l$$, variable $$V$$ has been set to value $$v$$.
110 | Whenever we branch on a variable, we create a node for this variable
111 | assignment but do not add any edges. Whenever unit propagation allows us
112 | to derive the value of a variable, we create a node for this variable
113 | assignment, and we add directed edges to this new node from the nodes
114 | corresponding to the variable settings that allowed unit propagation to
115 | make this derivation and label each edge with the clause that was used
116 | to make the derivation.
117 |
118 | When we reach a contradiction, we can build a conflict graph and
119 | identify opportunities to learn from mistakes. In an implication graph
120 | which terminates in a contradiction, every cut that separates the
121 | decision variables from the contradiction defines a conflict set. The
122 | nodes in the conflict set are exactly the source nodes of all of the
123 | (directed) edges that cross the cut.
124 |
125 | Though there may be several conflict sets, a typical approach is to
126 | identify a cut so that there is only one decision variable at the
127 | highest (last) decision level on the reason side and then backtrack to
128 | the second highest (second to last) decision level. Following this
129 | backtracking procedure, the clause that was added is guaranteed to
130 | trigger unit propagation.
131 |
132 | Another approach is to identify a Unique Implication Point (UIP) which
133 | is a node at the highest decision level that appears in every path from
134 | the highest decision level to the contradiction. If there is more than
135 | one UIP, often we choose a UIP that is closest to the contradiction
136 | since this might allow shorter clauses.
137 |
138 | Non-chronological backtracking is sound and complete assuming we keep
139 | all of the clauses that we learned. Note that non-chronological
140 | backtracking might go down the same path multiple times.
141 |
142 | ## Example for DPLL and Clause Learning
143 |
144 | Consider the formula $${\Delta}$$ below:
145 | 1. \$$\{A,B\}$$
146 | 2. \$$\{B,C\}$$
147 | 3. \$$\{\neg A,\neg X,Y\}$$
148 | 4. \$$\{\neg A,X,Z\}$$
149 | 5. \$$\{\neg A,\neg Y,Z\}$$
150 | 6. \$$\{\neg A,X,\neg Z\}$$
151 | 7. \$$\{\neg A,\neg Y,\neg Z\}$$
152 |
153 | 
154 |
155 | *Figure from Chapter 3, page 8 of draft chapter on satisfiability by
156 | Adnan Darwiche*
157 |
158 | For DPLL, assume the variable ordering $$A,B,C,X,Y,Z$$ for the search
159 | procedure and that variables are assigned true before being assigned
160 | false when branching. The result of running DPLL on this formula is
161 | given by the figure above. We see that the DPLL algorithm is forced to
162 | explore almost half of the entire tree before it is able to detect the
163 | contradiction.
164 |
165 | 
166 |
167 | *Figure from Chapter 3, page 11 of draft chapter on satisfiability by
168 | Adnan Darwiche*
169 |
170 | On the other hand, when running CDCL on the same formula with the same
171 | search tree, CDCL will learn a good clause to add to the KB from a cut
172 | in the implication graph. These learned clauses allow CDCL to traverse
173 | the search tree in a more efficient manner.
174 |
175 | ## Engineering considerations
176 |
177 | Key features of SAT solvers:
178 | 1. Conflict-driven clause learning
179 | 2. Unit propagation with watched literals
180 | 3. Dynamic variable selection/branching heuristic
181 | 4. Random restarts
182 |
183 | An efficient implementation of unit propagation is very important since
184 | a lot of time is spent doing unit propagation. We can implement unit
185 | propagation more efficiently by using a lazy approach. We only watch two
186 | variables in each clause. Every time we assign a variable, we only need
187 | to check the clauses for which that variable is watched. Whenever a
188 | watched variable becomes assigned to true or false, we check each clause
189 | for which that variable is watched: if we can we do unit propagation, we
190 | do unit propagation and that clause becomes satisfied; otherwise, we
191 | find another variable in that clause to watch.
192 |
193 | In what order do we assign the variables and values? We try to keep
194 | track of how important each variable is. One heuristic is that if a
195 | variable appears in many clauses, it’s probably more important.
196 |
197 | Restarts with different random seeds (while keeping learned clauses) can
198 | be very helpful in practice because the runtime distribution of
199 | backtrack search methods exhibits a heavy tail. The size of the search
200 | tree can vary dramatically, depending on the order in which we pick the
201 | variables, so a SAT solver can get lost in a large part of the state
202 | space where there is no solution. If you keep doing restarts, how can
203 | you guarantee completeness? One heuristic is to double the restart
204 | cutoff each time.
205 |
206 | ## Tutorial on practical SAT solvers
207 |
208 | [MiniSat](http://minisat.se/) is a “minimalistic, open-source SAT
209 | solver, developed to help researchers and developers alike to get
210 | started on SAT.” MiniSat can be installed from their [Github
211 | repository](http://github.com/niklasso/minisat). Mac users can install
212 | MiniSat directly using [Homebrew](https://brew.sh/) with the command
213 | `brew install minisat`. MiniSat usage is
214 | `minisat [options] ` where options
215 | can be viewed with `minisat –help`.
216 |
217 | Examples of benchmark problems are available
218 | [here](http://www.cs.ubc.ca/~hoos/SATLIB/benchm.html). For example, we
219 | can download
220 | [CBS\_k3\_n100\_m403\_b10](http://www.cs.ubc.ca/~hoos/SATLIB/Benchmarks/SAT/CBS/CBS_k3_n100_m403_b10.tar.gz)
221 | random 3-SAT instances which have 100 variables, 403 clauses, backbone
222 | size 10 - 1000 instances, all satisfiable. After unzipping the file, we
223 | can solve one of the instances using MiniSat with the command
224 | `minisat CBS_k3_n100_m403_b10_0.cnf`. By viewing this file in a text
225 | editor, we see that the first 3 lines are comments (lines starting with
226 | “c”), the next line is the problem header `p cnf 100 403` representing
227 | `p FORMAT NUM_VARIABLES NUM_CLAUSES`, and each subsequent line
228 | represents a clause where the variables in the disjunction are listed
229 | (negated variables appear with a negative sign and each line is
230 | terminated with a “0”). Variables are assumed to be numbered from 1 to
231 | n. For example, the clause
232 | $${\left(\neg X_{9}\lor X_{40}\lor\neg X_{68}\right)}$$ is represented as
233 | `-9 40 -68 0`. Further descriptions of the file formats are available
234 | [here](http://www.cs.ubc.ca/~hoos/SATLIB/Benchmarks/SAT/satformat.ps).
235 |
--------------------------------------------------------------------------------
/logic/random_walk/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: post
3 | title: 'Inference - random walk satisfiability solvers'
4 | ---
5 |
6 | ## Introduction
7 |
8 | So far, we’ve introduced propositional logic and considered the problem
9 | of deciding whether a propositional formula is satisfiable. The
10 | algorithms we’ve studied so far to decide satisfiability, such as DPLL
11 | and CDCL, are algorithms which conduct a systematic search of the state
12 | space, terminating only when the algorithm has found a satisfying
13 | assignment or a proof that the formula is unsatisfiable.
14 |
15 | In this lecture, we’ll introduce a randomized algorithm for deciding
16 | whether a propositional formula is satisfiable or not, which returns a
17 | correct answer with high probability. Instead of systematically
18 | searching the state space, we’ll conduct a random walk over the state
19 | space which can then be analyzed using Markov chains.
20 |
21 | The setup for the random walk is as follows. Given a formula with $$n$$
22 | Boolean variables, define our state space to consist of all possible
23 | truth assignments to these $$n$$ Boolean variables. By considering a
24 | mapping from $$n$$ truth assignments to binary strings of length $$n$$, our
25 | state space can be equivalently viewed as a Boolean hypercube of
26 | dimension $$n$$, where each of the $$2^{n}$$ vertices corresponds to a truth
27 | assignment. Later, we will set up a randomized search procedure which
28 | does a random walk over the possible states to decide whether a formula
29 | is satisfiable. This type of search is known as a stochastic local
30 | search procedure.
31 |
32 | ## Review of Markov chains
33 |
34 | A Markov chain is a discrete stochastic process $$(X_{n},n\geq0)$$ where
35 | each $$X_{n}\in S$$ ($$S$$ is the state space). A Markov chain satisfies the
36 | Markov property, the key conditional independence relationship: the
37 | future is conditionally independent of the past given the present.
38 | Explicitly, we can write this as
39 | $$P(X_{n+1}=j\mid X_{n}=i,...,X_{0}=i_{0})=P(X_{n+1}=j\mid X_{n}=i)$$ for
40 | all possible states $$j,i_{0},...i$$ and all time steps $$n$$.
41 |
42 | Furthermore, if $$P(X_{n+1}=j\mid X_{n}=i)=P_{ij}$$ holds for all $$n$$,
43 | then the transition probabilities are independent of time, and we call
44 | the Markov chain homogenous. In this case, we can define the transition
45 | matrix $$P=[P_{ij}]$$, and the full joint probability distribution defined
46 | by the Markov chain is then completely determined by the transition
47 | matrix $$P$$ and the initial probability distribution $$\Pi^{0}$$.
48 |
49 | Graphically, we can represent a Markov chain as a directed graph with
50 | weighted edges. Each node in the graph represents a possible state of
51 | the Markov chain. We have directed edges between nodes with edges
52 | weights representing the probability of transitioning between the
53 | corresponding states.
54 |
55 | # Random walk algorithm for 2-SAT
56 |
57 | Let the state space $$S=\{0,1\}^{n}$$ be the set of vertices of the
58 | Boolean hypercube. Since the number of possible states is $$2^{n}$$, for
59 | large $$n$$, the state space cannot be enumerated completely, and the
60 | transition matrix $$P$$ cannot be represented explicitly. We want to find
61 | a way to simulate from the Markov chain without representing the Markov
62 | chain explicitly.
63 |
64 | We’ll first consider the following algorithm for solving 2-CNF formulas
65 | (a CNF formula where every clause has at most 2 variables) and then
66 | generalize later.
67 |
68 | Input: a 2-CNF formula with $$n$$ variables
69 | 1. Repeat $$b$$ times:
70 | 1. Pick an arbitrary truth assignment $$x$$
71 | 2. Repeat $$2n^{2}$$ times:
72 | 1. If $$x$$ satisfies all clauses, return satisfiable
73 | 2. Otherwise, pick any clause that is not satisfied and choose
74 | one of the variables uniformly at random from this clause
75 | and flip the truth assignment of that variable
76 | 2. Return unsatisfiable
77 |
78 | ## Analysis of algorithm for 2-SAT
79 |
80 | If the propositional formula is unsatisfiable, then the algorithm will
81 | always return unsatisfiable. If the formula is satisfiable, then the
82 | algorithm might not find the satisfying assignment. This is the only
83 | case where the algorithm makes a mistake. We’ll bound the error
84 | probability for this case.
85 |
86 | Let $$a=a_{1}...a_{n}$$ be a truth value assignment which satisfies the
87 | CNF formula. Let $$x_{t}$$ be the truth assignment in the $$t^{th}$$
88 | iteration of the inner loop. Define the random variable $$X_{t}$$ to be
89 | the number of bits for which $$x_{t}$$ and $$a$$ match. If $$X_{t}=n$$, then
90 | we’ve found the satisfying assignment $$a$$.
91 |
92 | Note that when we flip any one bit in $$x_{t}$$, $$X_{t}$$ either increases
93 | or decreases by 1. Thus, in the inner loop when we flip the truth
94 | assignment of some variable, if $$X_{t}=j$$, then $$X_{t+1}$$ can only take
95 | on the values $$\{j-1,j+1\}$$.
96 |
97 | 
98 |
99 | To analyze the transition probabilities, note that when we pick a
100 | violated clause in the inner loop, we know that the truth assignment for
101 | at least one of the variables in that clause does not match the
102 | corresponding truth assignment in $$a$$ (because $$a$$ is a satisfying
103 | assignment). In either the case where one of the variables is set
104 | incorrectly or both of the variables are set incorrectly, the following
105 | inequalities hold for the transition probabilities:
106 |
107 | \$$
108 | \begin{eqnarray*}
109 | P[X_{t+1}=j+1\mid X_{t}=j] & \geq & \frac{1}{2}\\
110 | P[X_{t+1}=j-1\mid X_{t}=j] & \leq & \frac{1}{2}
111 | \end{eqnarray*}
112 | \$$
113 | {: style="text-align: center"}
114 |
115 | for any $$1\leq j\leq n-1$$ and all time steps $$t$$. For the boundary
116 | cases, we have that $$P[X_{t+1}=1\mid X_{t}=0]=1$$ and
117 | $$P[X_{t+1}=n\mid X_{t}=n]=1$$ for all $$t$$.
118 |
119 | Thus, we see that the random walk algorithm defines a stochastic process
120 | with a Markov chain over the random variable $$\{X_{t}\}$$, where each
121 | $$X_{t}\in\{0,...,n\}$$. For the purposes of our analysis, we will treat
122 | the transition probabilities as exactly $$\frac{1}{2}$$ which give us a
123 | bound on the worst-case performance.
124 |
125 | Define $$y_{j}=$$ \# of steps to reach state $$n$$ from state $$j$$, and
126 | define $$h_{j}=E[y_{j}]$$. Note that the variables take on the boundary
127 | values $$h_{n}=0$$ and $$h_{0}=1+h_{1}$$. The expected number of steps are
128 | related via the formula:
129 |
130 | \$$
131 | \begin{eqnarray*}
132 | E[y_{j}] & = & \frac{1}{2}{\left(1+E[y_{j-1}]\right)}+\frac{1}{2}{\left(1+E[y_{j+1}]\right)}\\
133 | h_{j} & = & \frac{h_{j-1}+h_{j+1}}{2}+1
134 | \end{eqnarray*}
135 | \$$
136 | {: style="text-align: center"}
137 |
138 | **Claim 1**: $$h_{j}=h_{j+1}+2j+1$$
139 |
140 | ***Proof***: Define $$f_{j}=h_{j}-h_{j-1}$$. From the recurrence relation
141 | above, we have that $$f_{j+1}=f_{j}-2$$. Since $$f_{1}=-1$$ by the boundary
142 | condition, it follows that $$f_{j+1}=-(2j+1)$$.
143 |
144 | **Claim 2**: $$h_{j}=n^{2}-j^{2}$$
145 |
146 | ***Proof***: Since $$h_{n}=0$$ by the boundary condition, it follows from
147 | repeated application of Claim 1 that $$h_{j}=\sum_{i=j}^{n-1}(2i+1)$$.
148 |
149 | Thus, we see that if the formula is satisfiable, the random walk will
150 | take, in expectation, roughly $$n^{2}$$ steps to find the satisfying
151 | assignment $$a$$. Let $$Z=$$ number of steps the random walk takes before
152 | reaching state $$n$$. By Markov’s inequality,
153 | $$P[Z\geq2n^{2}]\leq\frac{1}{2}$$, so the probability that the random walk
154 | does not find the satisfying assignment $$a$$ in $$2n^{2}$$ steps is
155 | $$\leq\frac{1}{2}$$. By running many independent runs of this algorithm,
156 | the failure probability for this random walk algorithm can be lowered to
157 | at most $${\left(\frac{1}{2}\right)}^{b}$$. Thus, we see that this random
158 | walk algorithm provably solves 2-SAT in polynomial time.
159 |
160 | # Random walk algorithm for 3-SAT
161 |
162 | Last lecture, we described and analyzed a random walk algorithm for
163 | deciding whether or not a 2-CNF formula is satisfiable. Now consider
164 | running the same algorithm presented last time on a 3-CNF formula. We’ll
165 | see later on in this lecture why we choose to repeat the inner loop $$3n$$
166 | times and always generate our truth assignment $$x$$ uniformly at random.
167 |
168 | Input: a 3-CNF formula $$\phi(x_{1},...,x_{n})$$
169 | 1. Repeat $$b$$ times:
170 | 1. Select truth assignment $$x$$ uniformly at random
171 | 2. Repeat $$3n$$ times:
172 | 1. If $$x$$ satisfies all clauses in $$\phi$$, return satisfiable
173 | 2. Otherwise, pick any clause that is not satisfied and choose
174 | one of the variables uniformly at random from this clause
175 | and flip the truth assignment of that variable
176 | 2. Return unsatisfiable
177 |
178 | ## Analysis of algorithm for 3-SAT
179 |
180 | 
181 |
182 | Using a similar analysis as in the last lecture, the bounds for the
183 | transition probabilities become
184 |
185 | \$$
186 | \begin{eqnarray*}
187 | P[X_{t+1}=j+1\mid X_{t}=j] & \geq & \frac{1}{3}\\
188 | P[X_{t+1}=j-1\mid X_{t}=j] & \leq & \frac{2}{3}
189 | \end{eqnarray*}
190 | \$$
191 | {: style="text-align: center"}
192 |
193 | for any $$1\leq j\leq n-1$$ and all time steps $$t$$. For the boundary
194 | cases, we have that $$P[X_{t+1}=1\mid X_{t}=0]=1$$ and
195 | $$P[X_{t+1}=n\mid X_{t}=n]=1$$ for all $$t$$.
196 |
197 | Define $$y_{j}=$$ \# of steps to reach state $$n$$ from state $$j$$, and
198 | define $$h_{j}=E[y_{j}]$$. Note that the variables take on the boundary
199 | values $$h_{n}=0$$ and $$h_{0}=1+h_{1}$$. The expected number of steps are
200 | related via the formula:
201 |
202 | \$$
203 | \begin{eqnarray*}
204 | h_{j} & = & \frac{2}{3}h_{j-1}+\frac{1}{3}h_{j+1}+1\\
205 | \end{eqnarray*}
206 | \$$
207 | {: style="text-align: center"}
208 |
209 | Define $$f_{j}=h_{j}-h_{j-1}$$. From the recurrence relation above, we
210 | have that $$f_{j+1}=2f_{j}-3$$. Since $$f_{1}=-1$$ by the boundary
211 | condition, $$-f_{j}=O(2^{j})$$. Therefore, $$h_{j-1}=h_{j}+O(2^{j})$$, and
212 | this gives us a biased random walk where $$h_{j}=O(2^{n})$$.
213 |
214 | If we work out the terms explicitly, we have that
215 | $$h_{j}=2^{n+2}-2^{j+2}-3(n-j)$$. Note that even
216 | $$h_{n-1}=2^{n+2}-2^{n+1}-3=O(2^{n})$$. This means that even if we were
217 | only one state away from finding the satisfying assignment, the expected
218 | running time is still exponential. This observation motivates the
219 | following modification to our algorithm.
220 |
221 | Suppose that we have a way of starting the Markov chain at state $$n-1$$.
222 | Then instead of simulating the Markov chain for an exponential number of
223 | steps hoping to find the satisfying assignment, the best approach is to
224 | continually restart the Markov chain at state $$n-1$$ and just take 1 step
225 | at a time until we find the satisfying assignment. We will extend this
226 | idea to our algorithm by using very aggressive random restarts and short
227 | random walks.
228 |
229 | To start, recall that our algorithm will select a truth assignment $$x$$
230 | uniformly at random. The probability that our initial assignment will
231 | have $$j$$ of the $$n$$ truth assignments assigned correctly is
232 | $$P(X_{0}=j)=\frac{1}{2^{n}}{n \choose j}$$.
233 |
234 | Consider a short sequence of moves and let us compute the probability
235 | that if we make $$2k+j$$ moves, we have $$k$$ moves to the left and $$k+j$$
236 | moves to the right:
237 | $${2k+j \choose k}{\left(\frac{2}{3}\right)}^{k}{\left(\frac{1}{3}\right)}^{k+j}$$.
238 |
239 | Now suppose we start at state $$n-j$$. Let us work out a lower bound for
240 | $$q_{j}$$, the probability of reaching state $$n$$, starting at state $$n-j$$,
241 | in $$\leq3n$$ moves. In particular, $$q_{j}\geq$$ probability that if we
242 | make $$3j$$ moves, we have $$j$$ moves to the left and $$2j$$ moves to the
243 | right. Using the formula above, we have that
244 | $$q_{j}\geq{3j \choose j}{\left(\frac{2}{3}\right)}^{j}{\left(\frac{1}{3}\right)}^{2j}$$.
245 |
246 | By Stirling’s approximation,
247 | $$n!\sim\sqrt{2\pi n}{\left(\frac{n}{e}\right)}^{n}$$ so
248 |
249 | \$$
250 | \begin{eqnarray*}
251 | {3j \choose j} & = & \frac{(3j)!}{(2j)!j!}\\
252 | & \geq & \frac{c\sqrt{2\pi3j}}{\sqrt{2\pi j}\sqrt{2\pi2j}}{\left(\frac{3j}{e}\right)}^{3j}{\left(\frac{e}{2j}\right)}^{2j}{\left(\frac{e}{j}\right)}^{j}\\
253 | & = & \frac{a}{\sqrt{j}}{\left(\frac{27}{4}\right)}^{j}
254 | \end{eqnarray*}
255 | \$$
256 | {: style="text-align: center"}
257 |
258 | for $$j>0$$. Combining this bound with our previous expression,
259 | $$q_{j}\geq\frac{a}{\sqrt{j}}{\left(\frac{1}{2}\right)}^{j}$$ for $$j>0$$.
260 | The boundary condition is $$q_{0}=1$$.
261 |
262 | Combining everything, the probability that we will reach state $$n$$,
263 | starting from a truth assignment initialized uniformly at random, in
264 | $$\leq3n$$ moves is
265 |
266 | \$$
267 | \begin{eqnarray*}
268 | q & \geq & \sum_{j=0}^{n}P(X_{0}=n-j)q_{j}\\
269 | & \geq & \frac{1}{2^{n}}+\sum_{j=1}^{n}\frac{1}{2^{n}}{n \choose j}{\left(\frac{a}{\sqrt{j}}\frac{1}{2^{j}}\right)}\\
270 | & \geq & \frac{1}{2^{n}}+\frac{1}{2^{n}}\frac{a}{\sqrt{n}}\sum_{j=1}^{n}{n \choose j}\frac{1}{2^{j}}\\
271 | & \approx & \frac{a}{\sqrt{n}}{\left(\frac{3}{4}\right)}^{n}
272 | \end{eqnarray*}
273 | \$$
274 | {: style="text-align: center"}
275 |
276 | Therefore, the expected number of times that we have to repeat the
277 | procedure before getting a success is
278 | $$\frac{1}{q}\leq\frac{\sqrt{n}}{a}{\left(\frac{4}{3}\right)}^{n}$$.
279 | Choose $$b=\frac{2}{q}$$. Then our algorithm succeeds with probability at
280 | least $$1/2$$ with running time
281 | $$O{\left(\big(\frac{4}{3}\big)^{n}\right)}$$. We can finally boost our
282 | algorithm to the desired success probability. In conclusion, we’ve seen
283 | that we can go from a biased random walk with $$h_{j}=O(2^{n})$$ to an
284 | expected run time of $$O{\left(\big(\frac{4}{3}\big)^{n}\right)}$$ by
285 | using very aggressive random restarts.
286 |
287 | # Other variants
288 |
289 | ## WalkSAT
290 |
291 | WalkSAT is similar to the random walk algorithm described above. The
292 | difference is that when trying to fix a violated clause, WalkSAT will be
293 | greedy with some probability.
294 |
295 | After selecting a clause $$c$$,
296 | 1. With probability $$p$$, pick a variable in $$c$$ at random and flip the
297 | truth assignment of $$c$$
298 | 2. Otherwise, go through all the variables in $$c$$ and choose the
299 | variable with the smallest break count to flip (number of satisfied
300 | clauses that become unsatisfied when the variable is flipped).
301 |
302 | ## GSAT
303 | 1. With probability $$p$$, choose a variable at random and flip the truth
304 | assignment
305 | 2. Otherwise, check all variables $$x_{1}...x_{n}$$ and select the
306 | variable with largest $${\Delta}=$$ make count - break count
307 |
308 | ## Simulated annealing
309 | 1. Randomly pick a variable, calculate $${\Delta}E=$$ make count - break
310 | count
311 | 2. If the new state is a better state (lower energy), always make the
312 | transition
313 | 3. Otherwise, accept the transition with probability
314 | $$p=\exp{\left(\frac{-{\Delta}E}{T}\right)}$$ where T is the
315 | temperature parameter
316 |
317 | Note that the randomness helps the algorithm not get trapped in local
318 | minima.
319 |
--------------------------------------------------------------------------------
/css/tufte.scss:
--------------------------------------------------------------------------------
1 | ---
2 | # this ensures Jekyll reads the file to be transformed into CSS later
3 | # only Main files contain this front matter, not partials.
4 | nav_exclude: true
5 | ---
6 | /*****************************************************************************/
7 | /*
8 | /* Tufte Jekyll blog theme
9 | /* Based on Tufte CSS by Dave Liepmann ( https://github.com/edwardtufte/tufte-css )
10 | /*
11 | /* The README.md will show you how to set up your site along with other goodies
12 | /*****************************************************************************/
13 |
14 | // Imports to create final
15 |
16 | @import "../_sass/fonts";
17 | @import "../_sass/settings";
18 | @import "../_sass/syntax-highlighting";
19 |
20 | // Global Resets
21 | //
22 | * { margin: 0; padding: 0; }
23 |
24 | /* clearfix hack after Cederholm (group class name) */
25 | .group:after {
26 | content: "";
27 | display: table;
28 | clear: both;
29 | }
30 |
31 | html, body { height: 100%; }
32 |
33 | // First significant deviation from CSS on tufte.css site - variable font size as browser width expands or contracts
34 | // html { font-size: 15px; }
35 |
36 | html{
37 | text-align: baseline;
38 | font-size: 11px;
39 | -webkit-font-smoothing: antialiased;
40 | -moz-osx-font-smoothing: grayscale;
41 |
42 | }
43 |
44 | @media screen and (min-width: 800px){ html{ font-size: 12px;} }
45 |
46 | @media screen and (min-width: 900px){ html{ font-size: 13px;} }
47 |
48 | @media screen and (min-width: 1000px){ html{ font-size: 14px;} }
49 |
50 | @media screen and (min-width: 1100px){ html{ font-size: 15px; } }
51 |
52 | // @media screen and (min-width: 1200px){ html{ font-size: 16px; } }
53 | //
54 | // @media screen and (min-width: 1300px){ html{ font-size: 17px; } }
55 |
56 | // sets link style according to values in _settings.scss
57 | .mathblock{
58 | font-size: 1.5rem;
59 | }
60 | a {
61 | color: $contrast-color;
62 | text-decoration: none;
63 | }
64 |
65 | // p > a { @if $link-style == underline
66 | // {
67 | // color: $text-color;
68 | // text-decoration: none;
69 | // border-bottom: 1px solid #777;
70 | // padding-bottom: 1px;
71 | // }
72 | // @else
73 | // {
74 | // color: $contrast-color;
75 | // text-decoration: none;
76 | // }
77 | // }
78 |
79 | /* Links: replicate underline that clears descenders */
80 | p > a:link, p > a:visited { color: inherit; }
81 |
82 | p > a:link { text-decoration: none;
83 | background: -webkit-linear-gradient(#fffff8, #fffff8), -webkit-linear-gradient(#fffff8, #fffff8), -webkit-linear-gradient(#333, #333);
84 | background: linear-gradient(#fffff8, #fffff8), linear-gradient(#fffff8, #fffff8), linear-gradient(#333, #333);
85 | -webkit-background-size: 0.05em 1px, 0.05em 1px, 1px 1px;
86 | -moz-background-size: 0.05em 1px, 0.05em 1px, 1px 1px;
87 | background-size: 0.05em 1px, 0.05em 1px, 1px 1px;
88 | background-repeat: no-repeat, no-repeat, repeat-x;
89 | text-shadow: 0.03em 0 #fffff8, -0.03em 0 #fffff8, 0 0.03em #fffff8, 0 -0.03em #fffff8, 0.06em 0 #fffff8, -0.06em 0 #fffff8, 0.09em 0 #fffff8, -0.09em 0 #fffff8, 0.12em 0 #fffff8, -0.12em 0 #fffff8, 0.15em 0 #fffff8, -0.15em 0 #fffff8;
90 | background-position: 0% 93%, 100% 93%, 0% 93%; }
91 |
92 | @media screen and (-webkit-min-device-pixel-ratio: 0) { p > a:link { background-position-y: 87%, 87%, 87%; } }
93 |
94 | p > a:link::selection { text-shadow: 0.03em 0 #b4d5fe, -0.03em 0 #b4d5fe, 0 0.03em #b4d5fe, 0 -0.03em #b4d5fe, 0.06em 0 #b4d5fe, -0.06em 0 #b4d5fe, 0.09em 0 #b4d5fe, -0.09em 0 #b4d5fe, 0.12em 0 #b4d5fe, -0.12em 0 #b4d5fe, 0.15em 0 #b4d5fe, -0.15em 0 #b4d5fe;
95 | background: #b4d5fe; }
96 |
97 | p > a:link::-moz-selection { text-shadow: 0.03em 0 #b4d5fe, -0.03em 0 #b4d5fe, 0 0.03em #b4d5fe, 0 -0.03em #b4d5fe, 0.06em 0 #b4d5fe, -0.06em 0 #b4d5fe, 0.09em 0 #b4d5fe, -0.09em 0 #b4d5fe, 0.12em 0 #b4d5fe, -0.12em 0 #b4d5fe, 0.15em 0 #b4d5fe, -0.15em 0 #b4d5fe;
98 | background: #b4d5fe; }
99 |
100 | body { width: 87.5%;
101 | margin-left: auto;
102 | margin-right: auto;
103 | padding-left: 12.5%;
104 | font-family: et-book, Palatino, "Palatino Linotype", "Palatino LT STD", "Book Antiqua", Georgia, serif;
105 | background-color: $bg-color;
106 | color: $text-color;
107 | max-width: 1400px;
108 | counter-reset: sidenote-counter; }
109 |
110 | // --------- Typography stuff -----------//
111 | // added rational line height and margins ala http://webtypography.net/intro/
112 |
113 | h1 { font-weight: 400;
114 | margin-top: 1.568rem;
115 | margin-bottom: 1.568rem;
116 | font-size: 2.5rem;
117 | line-height: 0.784; }
118 |
119 | // h2 { font-style: italic;
120 | // font-weight: 400;
121 | // margin-top: 1.866666666666667rem;
122 | // margin-bottom: 0;
123 | // font-size: 2.1rem;
124 | // line-height: 0.933333333333333; }
125 | //
126 | // h3 { font-style: italic;
127 | // font-weight: 400;
128 | // font-size: 1.8rem;
129 | // margin-top: 2.1777777777777778rem;
130 | // margin-bottom: 0;
131 | // line-height: 1.08888888888889; }
132 |
133 | // h1 { font-weight: 400;
134 | // margin-top: 4rem;
135 | // margin-bottom: 1.5rem;
136 | // font-size: 3.2rem;
137 | // line-height: 1; }
138 |
139 | h2 { font-style: italic;
140 | font-weight: 400;
141 | margin-top: 4rem;
142 | margin-bottom: 1rem;
143 | font-size: 2.2rem;
144 | line-height: 1; }
145 |
146 | h3 { font-style: italic;
147 | font-weight: 400;
148 | font-size: 1.7rem;
149 | margin-top: 2rem;
150 | margin-bottom: 0;
151 | line-height: 1; }
152 |
153 | // ET says a need for more than 3 levels of headings is the sign of a diseased mind
154 |
155 | // p .subtitle { font-style: italic;
156 | // margin-top: 2.1777777777777778rem;
157 | // margin-bottom: 2.1777777777777778rem;
158 | // font-size: 1.8rem;
159 | // display: block;
160 | // line-height: 1.08888888888889; }
161 |
162 | p.subtitle { font-style: italic;
163 | margin-top: 1rem;
164 | margin-bottom: 1rem;
165 | font-size: 1.8rem;
166 | display: block;
167 | line-height: 1; }
168 |
169 | p, ol, ul { font-size: 1.4rem; }
170 |
171 | // p, li { line-height: 2rem;
172 | // margin-top: 1.4rem;
173 | // padding-right: 2rem; //removed because, why?
174 | // vertical-align: baseline; }
175 |
176 | p { line-height: 2rem;
177 | margin-top: 1.4rem;
178 | margin-bottom: 1.4rem;
179 | padding-right: 0;
180 | vertical-align: baseline; }
181 |
182 | // blockquote p { font-size: 1.1rem;
183 | // line-height: 1.78181818;
184 | // margin-top: 1.78181818rem;
185 | // margin-bottom: 1.78181818rem;
186 | // width: 45%;
187 | // padding-left: 2.5%;
188 | // padding-right: 2.5%; }
189 | //
190 | // blockquote footer { width: 45%;
191 | // text-align: right; }
192 |
193 | blockquote { font-size: 1.4rem; }
194 |
195 | blockquote p { width: 50%; }
196 |
197 | blockquote footer { width: 50%;
198 | font-size: 1.1rem;
199 | text-align: right; }
200 |
201 |
202 | .sans { font-family: $sans-font;
203 | letter-spacing: .03em; }
204 |
205 | pre, pre code, p code, p pre code { font-family: $code-font; // removed .code 'class' since code is an actual html tag
206 | font-size: 1.2rem; // also added p code, p pre code and pre selector to account for Markdown parsing
207 | line-height: 1.71428571; // of triple backticks plus rationalized line-heights and margins
208 | margin-top: 1.71428571rem; }
209 |
210 |
211 | h1 code, h2 code, h3 code { font-size: 0.80em; } //left in for no real reason
212 |
213 | /*-- Table styling section - For Tufte-Jekyll, booktabs style is default for Markdown tables --*/
214 |
215 | table, table.booktabs { width: auto; //making booktabs style tables the unstyled default in case someone uses Markdown styling
216 | margin: 0 auto;
217 | border-spacing: 0px;
218 | border-top: 2px solid $border-color;
219 | border-bottom: 2px solid $border-color; }
220 |
221 | .booktabs th, th { border-bottom: 1px solid $border-color;
222 | padding: 0.65ex 0.5em 0.4ex 0.5em;
223 | font-weight: normal;
224 | text-align: center; }
225 |
226 | th, td{ font-size: 1.2rem;
227 | line-height: 1.71428571; }
228 |
229 | .booktabs th.cmid, th { border-bottom: 1px solid lighten($border-color, 25%); }
230 |
231 | .booktabs th.nocmid { border-bottom: none; }
232 |
233 | .booktabs tbody tr:first-child td, tr:first-child td { padding-top: 0.65ex; } /* add space between thead row and tbody */
234 |
235 | .booktabs td, td { padding-left: 0.5em;
236 | padding-right: 0.5em;
237 | text-align: left; }
238 |
239 | .booktabs caption, caption { font-size: 90%;
240 | text-align: left;
241 | width: auto;
242 | margin-left: auto;
243 | margin-right: auto;
244 | margin-top: 1ex;
245 | caption-side: top; }
246 |
247 | .booktabs tbody tr td.l { text-align: left !important; }
248 | .booktabs tbody tr td.c { text-align: center !important; }
249 | .booktabs tbody tr td.r { text-align: right !important; }
250 |
251 | .table-caption { float:right;
252 | clear:right;
253 | margin-right: -60%;
254 | width: 50%;
255 | margin-top: 0;
256 | margin-bottom: 0;
257 | font-size: 1.0rem;
258 | line-height: 1.96; }
259 | /* -- End of Table Styling section --*/
260 |
261 |
262 | /* Basic Layout stuff --*/
263 |
264 | // article { position: relative;
265 | // padding: 1
266 | //
267 | // rem 0rem 2.5rem 0rem; } // reduced top and bottom padding by 50%
268 | //
269 | // section { padding-top: 1rem;
270 | // padding-bottom: 1rem; }
271 |
272 | article { position: relative;
273 | padding: 5rem 0rem; }
274 |
275 | section { padding-top: 1rem;
276 | padding-bottom: 1rem; }
277 |
278 | p, ol, ul { font-size: 1.4rem; }
279 |
280 | ul { width: 87.5%;
281 | -webkit-padding-start: 5%;
282 | -webkit-padding-end: 5%;
283 | list-style-type: none; }
284 |
285 | ol { width: 87.5%;
286 | -webkit-padding-start: 5%;
287 | -webkit-padding-end: 5%; }
288 |
289 | ul li { padding: 0.5em 0; } //vertical padding on list items screws up vertical rhythym
290 |
291 | figure, figure img.maincolumn { max-width: 55%;
292 | -webkit-margin-start: 0;
293 | -webkit-margin-end: 0;
294 | margin-bottom: 3em; }
295 |
296 | figcaption { float: right;
297 | clear: right;
298 | margin-right: -48%;
299 | margin-top: 0;
300 | margin-bottom: 0;
301 | font-size: 1.0rem;
302 | line-height: 1.6;
303 | vertical-align: baseline;
304 | position: relative;
305 | max-width: 40%; }
306 |
307 | figure.fullwidth figcaption { float: left; margin-right: 0%; margin-left: 36%; }
308 |
309 | img { max-width: 100%; }
310 |
311 | .sidenote, .marginnote { float: right;
312 | clear: right;
313 | margin-right: -60%;
314 | width: 50%;
315 | margin-top: 0;
316 | margin-bottom: 1.96rem;
317 | font-size: 1.0rem;
318 | line-height: 1.96; //changed to bring line heights into rational pattern
319 | vertical-align: baseline;
320 | position: relative; }
321 |
322 | li .sidenote, li .marginnote{ margin-right: -80%; } //added to allow for the fact that lists are indented and marginnotes and sidenotes push to right
323 |
324 | blockquote .sidenote, blockquote .marginnote { margin-right: -79% }
325 |
326 | .sidenote-number { counter-increment: sidenote-counter; }
327 |
328 | .sidenote-number:after, .sidenote:before { content: counter(sidenote-counter) " ";
329 | font-family: et-book-roman-old-style;
330 | color: $contrast-color; //added color
331 | position: relative;
332 | vertical-align: baseline; }
333 |
334 | .sidenote-number:after { content: counter(sidenote-counter);
335 | font-size: 1rem;
336 | top: -0.5rem;
337 | left: 0.1rem; }
338 |
339 | .sidenote:before { content: counter(sidenote-counter) ".\000a0"; // this is unicode for a non-breaking space
340 | color: $contrast-color;
341 | top: 0rem; } //removed superscripting for numerical reference in sidenote
342 |
343 | p, footer, div.table-wrapper, div.mathblock { width: 55%; }
344 |
345 | div.table-wrapper { overflow-x: auto; } //changed all overflow values to 'auto' so scroll bars appear only as needed
346 |
347 | @media screen and (max-width: 760px) { p, footer,div.mathblock { width: 90%; }
348 | pre code { width: 87.5%; }
349 | ul, ol { width: 85%; }
350 | figure { max-width: 90%; }
351 | figcaption, figure.fullwidth figcaption { margin-right: 0%;
352 | max-width: none; }
353 | blockquote p, blockquote footer { width: 80%;
354 | padding-left: 5%;
355 | padding-right: 5%;
356 | }}
357 |
358 | .marginnote code, .sidenote code { font-size: 1rem; } //more .code class removal
359 |
360 | pre, pre code, p pre code { width: 52.5%;
361 | padding-left: 2.5%;
362 | overflow-x: auto; }
363 |
364 | .fullwidth, li.listing div{ max-width: 90%; }
365 |
366 | .full-width { .sidenote, .sidenote-number, .marginnote { display: none; } }
367 |
368 | span.newthought { font-variant: small-caps;
369 | font-size: 1.2em;
370 | letter-spacing: 0.05rem; }
371 |
372 | input.margin-toggle { display: none; }
373 |
374 | label.sidenote-number { display: inline; }
375 |
376 | label.margin-toggle:not(.sidenote-number) { display: none; }
377 |
378 | @media (max-width: 760px) { label.margin-toggle:not(.sidenote-number) { display: inline; color: $contrast-color; }
379 | .sidenote, .marginnote { display: none; }
380 | .margin-toggle:checked + .sidenote,
381 | .margin-toggle:checked + .marginnote { display: block;
382 | float: left;
383 | left: 1rem;
384 | clear: both;
385 | width: 95%;
386 | margin: 1rem 2.5%;
387 | vertical-align: baseline;
388 | position: relative; }
389 | label { cursor: pointer; }
390 | pre, pre code, p code, p pre code { width: 90%;
391 | padding: 0; }
392 | .table-caption { display: block;
393 | float: right;
394 | clear: both;
395 | width: 98%;
396 | margin-top: 1rem;
397 | margin-bottom: 0.5rem;
398 | margin-left: 1%;
399 | margin-right: 1%;
400 | vertical-align: baseline;
401 | position: relative; }
402 | div.table-wrapper, table, table.booktabs { width: 85%; }
403 | div.table-wrapper { border-right: 1px solid #efefef; }
404 | img { max-width: 100%; } }
405 | /*--- End of Basic Layout stuff from tufte.css ---*/
406 |
407 | /* -- Jekyll specific styling --*/
408 | //helper classes
409 |
410 | .contrast { color: $contrast-color;}
411 | .smaller { font-size: 80%;}
412 | //Nav and Footer styling area
413 |
414 | header > nav.group, body footer {
415 | width: 95%;
416 | padding-top: 2rem;
417 | }
418 |
419 | nav.group a.active:before{ content:"\0003c\000a0";} // escaped unicode for the carats and then a space on active menu links
420 | nav.group a.active:after{ content:"\000a0\0003e" ;}
421 |
422 | header > nav a{
423 | font-size: 1.2rem;
424 | font-family: $sans-font;
425 | letter-spacing: 0.15em;
426 | text-transform: uppercase;
427 | color: $contrast-color;
428 | padding-top: 1.5rem;
429 | text-decoration: none;
430 | display: inline-block;
431 | float: left;
432 | margin-top: 0;
433 | margin-bottom: 0;
434 | padding-right: 2rem;
435 | //margin-left: 1rem;
436 | vertical-align: baseline;
437 | }
438 | header > nav a img{
439 | height: 5rem;
440 | position: relative;
441 | max-width: 100%;
442 | top:-1.5rem;
443 | }
444 | ul.footer-links, .credits{
445 | list-style: none;
446 | text-align: center;
447 | margin: 0 auto;
448 | }
449 | ul.footer-links li{
450 | display: inline;
451 | padding: 0.5rem 0.25rem;
452 | }
453 | .credits{
454 | padding: 1rem 0rem;
455 | }
456 |
457 | //change font color for credit links in footer
458 |
459 | .credits{
460 | font-family: $sans-font;
461 | & a{
462 | color: $contrast-color;
463 | }
464 | }
465 |
466 | // End of Nav and Footer styling area
467 |
468 | //Full width page styling stuff
469 |
470 | body.full-width, .content-listing, li.listing{ width: 90%;
471 | margin-left: auto;
472 | margin-right: auto;
473 | padding: 0% 5%;
474 |
475 | }
476 | ul.content-listing, ol.content-listing{ width: 85%;
477 | margin-left: 0;
478 | margin-right: 0;
479 | padding: 0% 5%;
480 |
481 | }
482 | .full-width article p{
483 | width: 90%;
484 | }
485 |
486 |
487 | h1.content-listing-header{
488 | font-style: normal;
489 | text-transform: uppercase;
490 | letter-spacing: 0.2rem;
491 | font-size: 1.8rem;
492 | }
493 |
494 | li.listing hr{
495 | width:100%;
496 | }
497 | .listing, .listing h3
498 | {
499 | display: inline-block;
500 | margin:0;
501 | }
502 | li.listing {
503 | margin:0;
504 | & p{
505 | width: 100%
506 | }
507 | }
508 |
509 |
510 | li.listing:last-of-type{
511 | border-bottom: none;
512 | margin-bottom: 1.4rem;
513 | }
514 | li.listing h3.new {
515 | text-transform: uppercase;
516 | font-style: normal;
517 | }
518 | hr.slender {
519 | border: 0;
520 | height: 1px;
521 | margin-top: 1.4rem;
522 | margin-bottom:1.4rem;
523 | background-image: -webkit-linear-gradient(left, rgba(0,0,0,0), rgba(0,0,0,0.75), rgba(0,0,0,0));
524 | background-image: -moz-linear-gradient(left, rgba(0,0,0,0), rgba(0,0,0,0.75), rgba(0,0,0,0));
525 | background-image: -ms-linear-gradient(left, rgba(0,0,0,0), rgba(0,0,0,0.75), rgba(0,0,0,0));
526 | background-image: -o-linear-gradient(left, rgba(0,0,0,0), rgba(0,0,0,0.75), rgba(0,0,0,0));
527 | }
528 | // End of front listing page stuff
529 |
530 |
531 | // Printing ands screen media queries
532 |
533 | // Does not display a print-footer for screen display
534 | @media screen{
535 | .print-footer{
536 | display: none;
537 | }
538 | }
539 |
540 | //printing stuff
541 | @media print {
542 | *,
543 | *:before,
544 | *:after {
545 | background: transparent !important;
546 | color: #000 !important; // Black prints faster:http://www.sanbeiji.com/archives/953
547 | box-shadow: none !important;
548 | text-shadow: none !important;
549 | }
550 | @page {
551 | margin: 0.75in 0.5in 0.75in 0.5in;
552 | orphans:4; widows:2;
553 | }
554 |
555 | body {
556 | font-size: 12pt;
557 |
558 | }
559 | html body span.print-footer{
560 | font-family: $sans-font;
561 | font-size: 9pt;
562 | margin-top: 22.4pt;
563 | padding-top: 4pt;
564 | border-top: 1px solid #000;
565 | }
566 |
567 | thead {
568 | display: table-header-group;
569 | }
570 |
571 | tr,
572 | img {
573 | page-break-inside: avoid;
574 | }
575 |
576 | img {
577 | max-width: 100% !important;
578 | }
579 |
580 | p,
581 | h2,
582 | h3 {
583 | orphans: 4;
584 | widows: 4;
585 | }
586 | article h2, article h2 h3, article h3, article h3 h4, article h4, article h4 h5 {
587 | page-break-after: avoid;
588 | }
589 |
590 | body header , body footer {
591 | display:none;
592 | }
593 | }
594 |
595 |
596 | /* --- Icomoon icon fonts CSS --*/
597 | @font-face {
598 | font-family: 'icomoon';
599 | src:url('../fonts/icomoon.eot?rgwlb8');
600 | src:url('../fonts/icomoon.eot?#iefixrgwlb8') format('embedded-opentype'),
601 | url('../fonts/icomoon.woff?rgwlb8') format('woff'),
602 | url('../fonts/icomoon.ttf?rgwlb8') format('truetype'),
603 | url('../fonts/icomoon.svg?rgwlb8#icomoon') format('svg');
604 | font-weight: normal;
605 | font-style: normal;
606 | }
607 |
608 | [class^="icon-"], [class*=" icon-"] {
609 | font-family: 'icomoon';
610 | speak: none;
611 | font-style: normal;
612 | font-weight: normal;
613 | font-variant: normal;
614 | text-transform: none;
615 | line-height: 1;
616 | color: $contrast-color;
617 |
618 | /* Better Font Rendering =========== */
619 | -webkit-font-smoothing: antialiased;
620 | -moz-osx-font-smoothing: grayscale;
621 | }
622 |
623 | .icon-pencil:before {
624 | content: "\e600";
625 | }
626 | .icon-film:before {
627 | content: "\e60f";
628 | }
629 | .icon-calendar:before {
630 | content: "\e601";
631 | }
632 | .icon-link:before {
633 | content: "\e602";
634 | }
635 | .icon-info:before {
636 | content: "\e603";
637 | }
638 | .icon-cancel-circle:before {
639 | content: "\e604";
640 | }
641 | .icon-checkmark-circle:before {
642 | content: "\e605";
643 | }
644 | .icon-spam:before {
645 | content: "\e606";
646 | }
647 | .icon-mail:before {
648 | content: "\e607";
649 | }
650 | .icon-googleplus:before {
651 | content: "\e608";
652 | }
653 | .icon-facebook:before {
654 | content: "\e609";
655 | }
656 | .icon-twitter:before {
657 | content: "\e60a";
658 | }
659 | .icon-feed:before {
660 | content: "\e60b";
661 | }
662 | .icon-flickr:before {
663 | content: "\e60c";
664 | }
665 | .icon-github:before {
666 | content: "\e60d";
667 | }
668 | .icon-box-add:before {
669 | content: "\e60e";
670 | }
671 | /*-- End of Icomoon icon font section --*/
672 |
--------------------------------------------------------------------------------