├── .gitignore
├── .gitmodules
├── BayesStack
    ├── DirMulti.hs
    ├── Dirichlet.hs
    ├── Gibbs.hs
    ├── Gibbs
    │   ├── Concurrent.hs
    │   └── Simple.hs
    ├── TupleEnum.hs
    ├── Types.hs
    └── UniqueKey.hs
├── Data
    ├── Binary
    │   └── EnumMap.hs
    ├── Random
    │   └── Sequence.hs
    └── Sequence
    │   └── Chunk.hs
├── LICENSE
├── README.mkd
├── Setup.hs
├── SharedTasteTest.hs
├── bayes-stack.cabal
├── doc
    ├── installation.markdown
    └── usage.markdown
└── network-topic-models
    ├── BayesStack
        └── Models
        │   └── Topic
        │       ├── CitationInfluence.hs
        │       ├── CitationInfluenceNoTopics.hs
        │       ├── LDA.hs
        │       ├── LDARelevance.hs
        │       ├── SharedTaste.hs
        │       ├── Test.hs
        │       └── Types.hs
    ├── BenchLDA.hs
    ├── BenchST.hs
    ├── Benchmark.hs
    ├── DumpCI.hs
    ├── DumpCINT.hs
    ├── DumpLDA.hs
    ├── DumpLDARelevance.hs
    ├── DumpST.hs
    ├── FormatMultinom.hs
    ├── LICENSE
    ├── README.mkd
    ├── ReadData.hs
    ├── ReadRelevanceData.hs
    ├── RunCI.hs
    ├── RunCINT.hs
    ├── RunLDA.hs
    ├── RunLDARelevance.hs
    ├── RunST.hs
    ├── RunSampler.hs
    ├── SerializeText.hs
    ├── Setup.hs
    ├── TopicLanguageModel.hs
    ├── doc
        └── models
        │   ├── model_ci.tex
        │   ├── model_ci_nt.tex
        │   ├── model_lda.tex
        │   ├── model_st.tex
        │   ├── models.tex
        │   └── tikzlibrarybayesnet.code.tex
    └── network-topic-models.cabal


/.gitignore:
--------------------------------------------------------------------------------
1 | *.*~
2 | *.o
3 | *.hi
4 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "network-topic-models/doc/models/tikz-bayesnet"]
2 | 	path = network-topic-models/doc/models/tikz-bayesnet
3 | 	url = git://github.com/jluttine/tikz-bayesnet.git
4 | 


--------------------------------------------------------------------------------
/BayesStack/DirMulti.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TypeFamilies, FlexibleInstances, ConstraintKinds, DeriveGeneric, DefaultSignatures #-}
  2 | 
  3 | module BayesStack.DirMulti ( -- * Dirichlet/multinomial pair
  4 |                              Multinom, dirMulti, symDirMulti, multinom
  5 |                              -- | Do not do record updates with these
  6 |                            , dmTotal, dmAlpha, dmDomain
  7 |                            , setMultinom, SetUnset (..)
  8 |                            , addMultinom, subMultinom
  9 |                            , decMultinom, incMultinom
 10 |                            , prettyMultinom
 11 |                            , updatePrior
 12 |                            , obsProb
 13 |                              -- * Parameter estimation
 14 |                            , estimatePrior, reestimatePriors, reestimateSymPriors
 15 |                              -- * Convenience functions
 16 |                            , probabilities, decProbabilities
 17 |                            ) where
 18 | 
 19 | import Data.EnumMap (EnumMap)
 20 | import qualified Data.EnumMap as EM
 21 | 
 22 | import Data.Sequence (Seq)
 23 | import qualified Data.Sequence as SQ
 24 | 
 25 | import qualified Data.Foldable as Foldable
 26 | import Data.Foldable (toList, Foldable, foldMap)
 27 | import Data.Function (on)
 28 | 
 29 | import Text.PrettyPrint
 30 | import Text.Printf
 31 | 
 32 | import GHC.Generics (Generic)
 33 | import Data.Binary
 34 | import Data.Binary.EnumMap ()
 35 | 
 36 | import BayesStack.Types
 37 | import BayesStack.Dirichlet
 38 | 
 39 | import Numeric.Log hiding (sum)
 40 | import Numeric.Digamma
 41 | import Math.Gamma hiding (p)
 42 | 
 43 | -- | Make error handling a bit easier
 44 | checkNaN :: RealFloat a => String -> a -> a
 45 | checkNaN loc x | isNaN x = error $ "BayesStack.DirMulti."++loc++": Not a number"
 46 | checkNaN loc x | isInfinite x = error $ "BayesStack.DirMulti."++loc++": Infinity"
 47 | checkNaN _ x = x
 48 | 
 49 | maybeInc, maybeDec :: (Num a, Eq a) => Maybe a -> Maybe a
 50 | maybeInc Nothing = Just 1
 51 | maybeInc (Just n) = Just (n+1)
 52 | maybeDec Nothing = error "Can't decrement zero count"
 53 | maybeDec (Just 1) = Nothing
 54 | maybeDec (Just n) = Just (n-1)
 55 | 
 56 | {-# INLINEABLE decMultinom #-}
 57 | {-# INLINEABLE incMultinom #-}
 58 | decMultinom, incMultinom :: (Num w, Eq w, Ord a, Enum a)
 59 |                          => a -> Multinom w a -> Multinom w a
 60 | decMultinom k = subMultinom 1 k
 61 | incMultinom k = addMultinom 1 k
 62 | 
 63 | subMultinom, addMultinom :: (Num w, Eq w, Ord a, Enum a)
 64 |                          => w -> a -> Multinom w a -> Multinom w a
 65 | subMultinom w k dm = dm { dmCounts = EM.alter maybeDec k $ dmCounts dm
 66 |                         , dmTotal = dmTotal dm - w }
 67 | addMultinom w k dm = dm { dmCounts = EM.alter maybeInc k $ dmCounts dm
 68 |                         , dmTotal = dmTotal dm + w }
 69 | 
 70 | data SetUnset = Set | Unset
 71 | 
 72 | setMultinom :: (Num w, Eq w, Enum a, Ord a) => SetUnset -> a -> Multinom w a -> Multinom w a
 73 | setMultinom Set   s = incMultinom s
 74 | setMultinom Unset s = decMultinom s
 75 | 
 76 | -- | 'Multinom a' represents multinomial distribution over domain 'a'.
 77 | -- Optionally, this can include a collapsed Dirichlet prior.
 78 | -- 'Multinom alpha count total' is a multinomial with Dirichlet prior
 79 | -- with symmetric parameter 'alpha', ...
 80 | data Multinom w a = DirMulti { dmAlpha :: !(Alpha a)
 81 |                              , dmCounts :: !(EnumMap a w)
 82 |                              , dmTotal :: !w
 83 |                              , dmDomain :: !(Seq a)
 84 |                              }
 85 |                   | Multinom { dmProbs :: !(EnumMap a Double)
 86 |                              , dmCounts :: !(EnumMap a w)
 87 |                              , dmTotal :: !w
 88 |                              , dmDomain :: !(Seq a)
 89 |                              }
 90 |                   deriving (Show, Eq, Generic)
 91 | instance (Enum a, Binary a, Binary w) => Binary (Multinom w a)
 92 | 
 93 | -- | 'symMultinomFromPrecision d p' is a symmetric Dirichlet/multinomial over a
 94 | -- domain 'd' with precision 'p'
 95 | symDirMultiFromPrecision :: (Num w, Enum a) => [a] -> DirPrecision -> Multinom w a
 96 | symDirMultiFromPrecision domain prec = symDirMulti (0.5*prec) domain
 97 | 
 98 | -- | 'dirMultiFromMeanPrecision m p' is an asymmetric Dirichlet/multinomial
 99 | -- over a domain 'd' with mean 'm' and precision 'p'
100 | dirMultiFromPrecision :: (Num w, Enum a) => DirMean a -> DirPrecision -> Multinom w a
101 | dirMultiFromPrecision m p = dirMultiFromAlpha $ meanPrecisionToAlpha m p
102 | 
103 | -- | Create a symmetric Dirichlet/multinomial
104 | symDirMulti :: (Num w, Enum a) => Double -> [a] -> Multinom w a
105 | symDirMulti alpha domain = dirMultiFromAlpha $ symAlpha domain alpha
106 | 
107 | -- | A multinomial without a prior
108 | multinom :: (Num w, Enum a) => [(a,Double)] -> Multinom w a
109 | multinom probs = Multinom { dmProbs = EM.fromList probs
110 |                           , dmCounts = EM.empty
111 |                           , dmTotal = 0
112 |                           , dmDomain = SQ.fromList $ map fst probs
113 |                           }
114 | 
115 | -- | Create an asymmetric Dirichlet/multinomial from items and alphas
116 | dirMulti :: (Num w, Enum a) => [(a,Double)] -> Multinom w a
117 | dirMulti domain = dirMultiFromAlpha $ asymAlpha $ EM.fromList domain
118 | 
119 | -- | Create a Dirichlet/multinomial with a given prior
120 | dirMultiFromAlpha :: (Enum a, Num w) => Alpha a -> Multinom w a
121 | dirMultiFromAlpha alpha = DirMulti { dmAlpha = alpha
122 |                                    , dmCounts = EM.empty
123 |                                    , dmTotal = 0
124 |                                    , dmDomain = alphaDomain alpha
125 |                                    }
126 | 
127 | data Acc w = Acc !w !Probability
128 | 
129 | obsProb :: (Enum a, Real w, Functor f, Foldable f)
130 |         => Multinom w a -> f (a, w) -> Probability
131 | obsProb (Multinom {dmProbs=prob}) obs =
132 |     Foldable.product $ fmap (\(k,w)->(realToFrac $ prob EM.! k)^^w) obs
133 |   where (^^) :: Real w => Log Double -> w -> Log Double
134 |         x ^^ y = Exp $ realToFrac y * ln x
135 | obsProb (DirMulti {dmAlpha=alpha}) obs =
136 |     let go (Acc w p) (k',w') = Acc (w+w') (p*p')
137 |            where p' = Exp $ checkNaN "obsProb"
138 |                       $ lnGamma (realToFrac w' + alpha `alphaOf` k')
139 |     in case Foldable.foldl' go (Acc 0 1) obs of
140 |          Acc w p -> p / alphaNormalizer alpha
141 |                     / Exp (lnGamma $ realToFrac w + sumAlpha alpha)
142 | {-# INLINE obsProb #-}
143 | 
144 | dmGetCounts :: (Enum a, Num w) => Multinom w a -> a -> w
145 | dmGetCounts dm k =
146 |   EM.findWithDefault 0 k (dmCounts dm)
147 | 
148 | instance HasLikelihood (Multinom w) where
149 |   type LContext (Multinom w) a = (Real w, Ord a, Enum a)
150 |   likelihood dm = obsProb dm $ EM.assocs $ dmCounts dm
151 |   {-# INLINEABLE likelihood #-}
152 | 
153 | instance FullConditionable (Multinom w) where
154 |   type FCContext (Multinom w) a = (Real w, Ord a, Enum a)
155 |   sampleProb (Multinom {dmProbs=prob}) k = prob EM.! k
156 |   sampleProb dm@(DirMulti {dmAlpha=a}) k =
157 |     let alpha = a `alphaOf` k
158 |         n = realToFrac $ dmGetCounts dm k
159 |         total = realToFrac $ dmTotal dm
160 |     in (n + alpha) / (total + sumAlpha a)
161 |   {-# INLINEABLE sampleProb #-}
162 | 
163 | {-# INLINEABLE probabilities #-}
164 | probabilities :: (Real w, Ord a, Enum a) => Multinom w a -> Seq (Double, a)
165 | probabilities dm = fmap (\a->(sampleProb dm a, a)) $ dmDomain dm -- FIXME
166 | 
167 | -- | Probabilities sorted decreasingly
168 | decProbabilities :: (Real w, Ord a, Enum a, Num w) => Multinom w a -> Seq (Double, a)
169 | decProbabilities = SQ.sortBy (flip (compare `on` fst)) . probabilities
170 | 
171 | prettyMultinom :: (Real w, Ord a, Enum a) => Int -> (a -> String) -> Multinom w a -> Doc
172 | prettyMultinom _ _ (Multinom {})         = error "TODO: prettyMultinom"
173 | prettyMultinom n showA dm@(DirMulti {})  =
174 |   text "DirMulti" <+> parens (text "alpha=" <> prettyAlpha showA (dmAlpha dm))
175 |   $$ nest 5 (fsep $ punctuate comma
176 |             $ map (\(p,a)->text (showA a) <> parens (text $ printf "%1.2e" p))
177 |             $ take n $ Data.Foldable.toList $ decProbabilities dm)
178 | 
179 | -- | Update the prior of a Dirichlet/multinomial
180 | updatePrior :: (Alpha a -> Alpha a) -> Multinom w a -> Multinom w a
181 | updatePrior _ (Multinom {}) = error "TODO: updatePrior"
182 | updatePrior f dm = dm {dmAlpha=f $ dmAlpha dm}
183 | 
184 | -- | Relative tolerance in precision for prior estimation
185 | estimationTol = 1e-8
186 | 
187 | reestimatePriors :: (Foldable f, Functor f, Real w, Enum a)
188 |                  => f (Multinom w a) -> f (Multinom w a)
189 | reestimatePriors dms =
190 |   let usableDms = filter (\dm->dmTotal dm > 5) $ toList dms
191 |       alpha = case () of
192 |                 _ | length usableDms <= 3 -> id
193 |                 otherwise -> const $ estimatePrior estimationTol usableDms
194 |   in fmap (updatePrior alpha) dms
195 | 
196 | reestimateSymPriors :: (Foldable f, Functor f, Real w, Enum a)
197 |                     => f (Multinom w a) -> f (Multinom w a)
198 | reestimateSymPriors dms =
199 |   let usableDms = filter (\dm->dmTotal dm > 5) $ toList dms
200 |       alpha = case () of
201 |                 _ | length usableDms <= 3 -> id
202 |                 otherwise -> const $ symmetrizeAlpha $ estimatePrior estimationTol usableDms
203 |   in fmap (updatePrior alpha) dms
204 | 
205 | -- | Estimate the prior alpha from a set of Dirichlet/multinomials
206 | estimatePrior' :: (Real w, Enum a) => [Multinom w a] -> Alpha a -> Alpha a
207 | estimatePrior' dms alpha =
208 |   let domain = toList $ dmDomain $ head dms
209 |       f k = let num = sum $ map (\i->digamma (realToFrac (dmGetCounts i k) + alphaOf alpha k)
210 |                                       - digamma (alphaOf alpha k)
211 |                                 )
212 |                       $ filter (\i->dmGetCounts i k > 0) dms
213 |                 total i = realToFrac $ sum $ map (\k->dmGetCounts i k) domain
214 |                 sumAlpha = sum $ map (alphaOf alpha) domain
215 |                 denom = sum $ map (\i->digamma (total i + sumAlpha) - digamma sumAlpha) dms
216 |             in case () of
217 |                  _ | isNaN num  -> error $ "BayesStack.DirMulti.estimatePrior': num = NaN: "++show (map (\i->(digamma (realToFrac (dmGetCounts i k) + alphaOf alpha k), digamma (alphaOf alpha k))) dms)
218 |                  _ | denom == 0 -> error "BayesStack.DirMulti.estimatePrior': denom=0"
219 |                  _ | isInfinite num -> error "BayesStack.DirMulti.estimatePrior': num is infinity "
220 |                  _ | isNaN (alphaOf alpha k * num / denom) -> error $ "NaN"++show (num, denom)
221 |                  otherwise  -> alphaOf alpha k * num / denom
222 |   in asymAlpha $ foldMap (\k->EM.singleton k (f k)) domain
223 | 
224 | estimatePrior :: (Real w, Enum a) => Double -> [Multinom w a] -> Alpha a
225 | estimatePrior tol dms = iter $ dmAlpha $ head dms
226 |   where iter alpha = let alpha' = estimatePrior' dms alpha
227 |                          (_, prec)  = alphaToMeanPrecision alpha
228 |                          (_, prec') = alphaToMeanPrecision alpha'
229 |                      in if abs ((prec' - prec) / prec) > tol
230 |                            then iter alpha'
231 |                            else alpha'
232 | 


--------------------------------------------------------------------------------
/BayesStack/Dirichlet.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveGeneric #-}
  2 | 
  3 | module BayesStack.Dirichlet ( -- * Dirichlet parameter
  4 |                               Alpha
  5 |                             , symAlpha, asymAlpha
  6 |                             , alphaDomain, alphaNormalizer, sumAlpha
  7 |                             , DirMean, DirPrecision
  8 |                             , alphaOf, setAlphaOf, setSymAlpha
  9 |                             , alphaToMeanPrecision, meanPrecisionToAlpha
 10 |                             , symmetrizeAlpha
 11 |                             , prettyAlpha
 12 |                             ) where
 13 | 
 14 | import Data.Foldable (toList, Foldable, fold)
 15 | 
 16 | import Data.EnumMap (EnumMap)
 17 | import qualified Data.EnumMap as EM
 18 | 
 19 | import Data.Sequence (Seq)
 20 | import qualified Data.Sequence as SQ
 21 | 
 22 | import Numeric.Log
 23 | import Math.Gamma
 24 | 
 25 | import Text.Printf
 26 | import Text.PrettyPrint
 27 | 
 28 | import Data.Binary
 29 | import Data.Binary.EnumMap ()
 30 | import GHC.Generics (Generic)
 31 | 
 32 | -- | Make error handling a bit easier
 33 | checkNaN :: RealFloat a => String -> a -> a
 34 | checkNaN loc x | isNaN x = error $ "BayesStack.Dirichlet."++loc++": Not a number"
 35 | checkNaN loc x | isInfinite x = error $ "BayesStack.Dirichlet."++loc++": Infinity"
 36 | checkNaN _ x = x
 37 | 
 38 | -- | A Dirichlet prior
 39 | data Alpha a = SymAlpha { aDomain :: Seq a
 40 |                         , aAlpha :: !Double
 41 |                         , aNorm :: Log Double
 42 |                         }
 43 |              | Alpha { aAlphas :: EnumMap a Double
 44 |                      , aSumAlphas :: !Double
 45 |                      , aNorm :: Log Double
 46 |                      }
 47 |              deriving (Show, Eq, Generic)
 48 | instance (Enum a, Binary a) => Binary (Alpha a)
 49 | 
 50 | type DirMean a = EnumMap a Double
 51 | type DirPrecision = Double
 52 | 
 53 | symAlpha :: Enum a => [a] -> Double -> Alpha a
 54 | symAlpha domain _ | null domain = error "Dirichlet over null domain is undefined"
 55 | symAlpha domain alpha = SymAlpha { aDomain = SQ.fromList domain
 56 |                                  , aAlpha = alpha
 57 |                                  , aNorm = alphaNorm $ symAlpha domain alpha
 58 |                                  }
 59 | 
 60 | -- | Construct an asymmetric Alpha
 61 | asymAlpha :: Enum a => EnumMap a Double -> Alpha a
 62 | asymAlpha alphas | EM.null alphas = error "Dirichlet over null domain is undefined"
 63 | asymAlpha alphas = Alpha { aAlphas = alphas
 64 |                          , aSumAlphas = Prelude.sum $ EM.elems alphas
 65 |                          , aNorm = alphaNorm $ asymAlpha alphas
 66 |                          }
 67 | 
 68 | setSymAlpha :: Enum a => Double -> Alpha a -> Alpha a
 69 | setSymAlpha alpha a = let b = (symmetrizeAlpha a) { aAlpha = alpha
 70 |                                                   , aNorm = alphaNorm b
 71 |                                                   }
 72 |                       in b
 73 | 
 74 | -- | Compute the normalizer of the likelihood involving alphas,
 75 | -- (product_k gamma(alpha_k)) / gamma(sum_k alpha_k)
 76 | alphaNorm :: Enum a => Alpha a -> Log Double
 77 | alphaNorm alpha = normNum / normDenom
 78 |   where dim = realToFrac $ SQ.length $ aDomain alpha
 79 |         normNum = case alpha of
 80 |                       Alpha {} -> product $ map (\a->Exp $ checkNaN ("alphaNorm.normNum(asym) alpha="++show a) $ lnGamma a)
 81 |                                   $ EM.elems $ aAlphas alpha
 82 |                       SymAlpha {} -> Exp $ checkNaN "alphaNorm.normNum(sym)" $ dim * lnGamma (aAlpha alpha)
 83 |         normDenom = Exp $ checkNaN "alphaNorm.normDenom" $ lnGamma $ sumAlpha alpha
 84 | 
 85 | -- | 'alphaDomain a' is the domain of prior 'a'
 86 | alphaDomain :: Enum a => Alpha a -> Seq a
 87 | alphaDomain (SymAlpha {aDomain=d}) = d
 88 | alphaDomain (Alpha {aAlphas=a}) = SQ.fromList $ EM.keys a
 89 | {-# INLINE alphaDomain #-}
 90 | 
 91 | alphaNormalizer :: Enum a => Alpha a -> Log Double
 92 | alphaNormalizer = aNorm
 93 | {-# INLINE alphaNormalizer #-}
 94 | 
 95 | -- | 'alphaOf alpha k' is the value of element 'k' in prior 'alpha'
 96 | alphaOf :: Enum a => Alpha a -> a -> Double
 97 | alphaOf (SymAlpha {aAlpha=alpha}) = const alpha
 98 | alphaOf (Alpha {aAlphas=alphas}) = (alphas EM.!)
 99 | {-# INLINE alphaOf #-}
100 | 
101 | -- | 'sumAlpha alpha' is the sum of all alphas
102 | sumAlpha :: Enum a => Alpha a -> Double
103 | sumAlpha (SymAlpha {aDomain=domain, aAlpha=alpha}) = realToFrac (SQ.length domain) * alpha
104 | sumAlpha (Alpha {aSumAlphas=sum}) = sum
105 | {-# INLINE sumAlpha #-}
106 | 
107 | -- | Set a particular alpha element
108 | setAlphaOf :: Enum a => a -> Double -> Alpha a -> Alpha a
109 | setAlphaOf k a alpha@(SymAlpha {}) = setAlphaOf k a $ asymmetrizeAlpha alpha
110 | setAlphaOf k a (Alpha {aAlphas=alphas}) = asymAlpha $ EM.insert k a alphas
111 | {-# INLINE setAlphaOf #-}
112 | 
113 | -- | 'alphaToMeanPrecision a' is the mean/precision representation of the prior 'a'
114 | alphaToMeanPrecision :: Enum a => Alpha a -> (DirMean a, DirPrecision)
115 | alphaToMeanPrecision (SymAlpha {aDomain=dom, aAlpha=alpha}) =
116 |   let prec = realToFrac (SQ.length dom) * alpha
117 |   in (EM.fromList $ map (\a->(a, alpha/prec)) $ toList dom, prec)
118 | alphaToMeanPrecision (Alpha {aAlphas=alphas, aSumAlphas=prec}) =
119 |   (fmap (/prec) alphas, prec)
120 | {-# INLINE alphaToMeanPrecision #-}
121 | 
122 | -- | 'meanPrecisionToAlpha m p' is a prior with mean 'm' and precision 'p'
123 | meanPrecisionToAlpha :: Enum a => DirMean a -> DirPrecision -> Alpha a
124 | meanPrecisionToAlpha mean prec = asymAlpha $ fmap (*prec) mean
125 | {-# INLINE meanPrecisionToAlpha #-}
126 | 
127 | -- | Symmetrize a Dirichlet prior (such that mean=0)
128 | symmetrizeAlpha :: Enum a => Alpha a -> Alpha a
129 | symmetrizeAlpha alpha@(SymAlpha {}) = alpha
130 | symmetrizeAlpha alpha@(Alpha {}) =
131 |   SymAlpha { aDomain = alphaDomain alpha
132 |            , aAlpha = sumAlpha alpha / realToFrac (EM.size $ aAlphas alpha)
133 |            , aNorm = alphaNorm $ symmetrizeAlpha alpha
134 |            }
135 | 
136 | -- | Turn a symmetric alpha into an asymmetric alpha. For internal use.
137 | asymmetrizeAlpha :: Enum a => Alpha a -> Alpha a
138 | asymmetrizeAlpha (SymAlpha {aDomain=domain, aAlpha=alpha}) =
139 |   asymAlpha $ fold $ fmap (\k->EM.singleton k alpha) domain
140 | asymmetrizeAlpha alpha@(Alpha {}) = alpha
141 | 
142 | -- | Pretty-print a Dirichlet prior
143 | prettyAlpha :: Enum a => (a -> String) -> Alpha a -> Doc
144 | prettyAlpha showA (SymAlpha {aAlpha=alpha}) = text "Symmetric" <+> double alpha
145 | prettyAlpha showA (Alpha {aAlphas=alphas}) =
146 |   text "Assymmetric"
147 |   <+> fsep (punctuate comma
148 |            $ map (\(a,alpha)->text (showA a) <> parens (text $ printf "%1.2e" alpha))
149 |            $ take 100 $ EM.toList $ alphas)
150 | 


--------------------------------------------------------------------------------
/BayesStack/Gibbs.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TypeFamilies, FlexibleInstances, FlexibleContexts,
 2 |              ExistentialQuantification, GADTs, CPP #-}
 3 | 
 4 | module BayesStack.Gibbs ( UpdateUnit(..)
 5 |                         , WrappedUpdateUnit(..)
 6 |                         ) where
 7 | 
 8 | import Control.DeepSeq
 9 | import Data.Random
10 | 
11 | class (Show (Setting uu), Show uu) => UpdateUnit uu where
12 |     type ModelState uu
13 |     type Setting uu
14 |     fetchSetting   :: uu -> ModelState uu -> Setting uu
15 |     evolveSetting  :: ModelState uu -> uu -> RVar (Setting uu)
16 |     updateSetting  :: uu -> Setting uu -> Setting uu -> ModelState uu -> ModelState uu
17 | 
18 | data WrappedUpdateUnit ms = forall uu. (UpdateUnit uu, ModelState uu ~ ms,
19 |                                         NFData (Setting uu), Eq (Setting uu))
20 |                          => WrappedUU uu
21 | 
22 | 


--------------------------------------------------------------------------------
/BayesStack/Gibbs/Concurrent.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TypeFamilies, FlexibleInstances, FlexibleContexts, GADTs, CPP #-}
 2 | 
 3 | module BayesStack.Gibbs.Concurrent ( gibbsUpdate
 4 |                                    , module BayesStack.Gibbs
 5 |                                    ) where
 6 | 
 7 | import BayesStack.Gibbs
 8 | import Control.Concurrent
 9 | import Control.Concurrent.STM
10 | import Control.DeepSeq
11 | import Control.Monad (replicateM_, when, forever)
12 | import Control.Monad.State hiding (lift)
13 | import Data.IORef
14 | import Data.Random
15 | import Data.Random.Lift
16 | import Debug.Trace (traceEventIO)
17 | import GHC.Conc.Sync (labelThread)
18 | import System.Random.MWC (withSystemRandom)
19 | 
20 | updateUnit :: WrappedUpdateUnit ms -> IORef ms -> TBQueue (ms -> ms) -> RVarT IO ()
21 | updateUnit (WrappedUU unit) stateRef diffQueue = do
22 |     modelState <- lift $ readIORef stateRef
23 |     let s = fetchSetting unit modelState
24 |     s' <- lift $ evolveSetting modelState unit
25 |     (s,s') `deepseq` return ()
26 |     when (s /= s') $
27 |         lift $ atomically $ writeTBQueue diffQueue (updateSetting unit s s')
28 | 
29 | updateWorker :: TQueue (WrappedUpdateUnit ms) -> IORef ms -> TBQueue (ms -> ms) -> RVarT IO ()
30 | updateWorker unitsQueue stateRef diffQueue = do
31 |     unit <- lift $ atomically $ tryReadTQueue unitsQueue
32 |     case unit of
33 |         Just unit' -> do updateUnit unit' stateRef diffQueue
34 |                          updateWorker unitsQueue stateRef diffQueue
35 |         Nothing -> return ()
36 | 
37 | #if __GLASGOW_HASKELL__ < 706
38 | atomicModifyIORef' = atomicModifyIORef
39 | #endif
40 | 
41 | diffWorker :: IORef ms -> TBQueue (ms -> ms) -> Int -> IO ()
42 | diffWorker stateRef diffQueue updateBlock = forever $ do
43 |     s <- readIORef stateRef
44 |     s' <- execStateT (replicateM_ updateBlock $ do
45 |                           diff <- lift $ atomically $ readTBQueue diffQueue
46 |                           modify diff
47 |                      ) s
48 |     atomicWriteIORef stateRef $! s'
49 |     traceEventIO "diffWorker: State updated"
50 | 
51 | labelMyThread :: String -> IO ()
52 | labelMyThread label = myThreadId >>= \id->labelThread id label
53 | 
54 | gibbsUpdate :: Int -> Int -> ms -> [WrappedUpdateUnit ms] -> IO ms
55 | gibbsUpdate nUpdateWorkers updateBlock modelState units = do
56 |     unitsQueue <- atomically $ do q <- newTQueue
57 |                                   mapM_ (writeTQueue q) units
58 |                                   return q
59 |     diffQueue <- atomically $ newTBQueue $ 2*updateBlock -- FIXME
60 |     stateRef <- newIORef modelState
61 |     diffThread <- forkIO $ do labelMyThread "diff worker"
62 |                               diffWorker stateRef diffQueue updateBlock
63 | 
64 |     runningWorkers <- atomically $ newTVar (0 :: Int)
65 |     done <- atomically $ newEmptyTMVar :: IO (TMVar ())
66 |     replicateM_ nUpdateWorkers $ forkIO $ withSystemRandom $ \mwc->do
67 |         labelMyThread "update worker"
68 |         atomically $ modifyTVar' runningWorkers (+1)
69 |         runRVarT (updateWorker unitsQueue stateRef diffQueue) mwc
70 |         atomically $ do
71 |             modifyTVar' runningWorkers (+(-1))
72 |             running <- readTVar runningWorkers
73 |             when (running == 0) $ putTMVar done ()
74 | 
75 |     atomically $ takeTMVar done
76 |     readIORef stateRef


--------------------------------------------------------------------------------
/BayesStack/Gibbs/Simple.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TypeFamilies, FlexibleInstances, FlexibleContexts, GADTs, CPP #-}
 2 | 
 3 | module BayesStack.Gibbs.Simple ( gibbsUpdate
 4 |                                , module BayesStack.Gibbs
 5 |                                ) where
 6 | 
 7 | import BayesStack.Gibbs
 8 | import Control.Monad.State hiding (lift)
 9 | import Control.DeepSeq       
10 | import Data.Random
11 | import Data.Random.Lift
12 | import System.Random.MWC       
13 | 
14 | updateUnit :: WrappedUpdateUnit ms -> StateT ms RVar ()
15 | updateUnit (WrappedUU unit) = do
16 |     ms <- get
17 |     let s = fetchSetting unit ms
18 |     s' <- lift $ evolveSetting ms unit
19 |     (s,s') `deepseq` return ()
20 |     put $ updateSetting unit s s' ms
21 | 
22 | gibbsUpdate :: ms -> [WrappedUpdateUnit ms] -> IO ms
23 | gibbsUpdate modelState units = withSystemRandom $ asGenIO $ \mwc->
24 |     runRVar (execStateT (mapM_ updateUnit units) modelState) mwc


--------------------------------------------------------------------------------
/BayesStack/TupleEnum.hs:
--------------------------------------------------------------------------------
1 | module BayesStack.TupleEnum where
2 | 
3 | -- This is probably a bad idea
4 | -- Perhaps some TH magic to automatically derive this would make it more tolerable
5 | instance (Enum a, Enum b) => Enum (a,b) where
6 |   fromEnum (a,b) = 2^32 * fromEnum a + fromEnum b
7 |   toEnum n = let (na, nb) = n `quotRem` (2^32)
8 |              in (toEnum na, toEnum nb)
9 | 


--------------------------------------------------------------------------------
/BayesStack/Types.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TypeFamilies, KindSignatures, ConstraintKinds #-}
 2 | 
 3 | module BayesStack.Types ( Probability
 4 |                         , HasLikelihood(..)
 5 |                         , FullConditionable(..)
 6 |                         ) where
 7 | 
 8 | import GHC.Prim (Constraint)
 9 | import Numeric.Log
10 | 
11 | type Probability = Log Double
12 | 
13 | class HasLikelihood p where
14 |   type LContext p a :: Constraint
15 |   type LContext p a = ()
16 |   likelihood :: LContext p a => p a -> Probability
17 | 
18 | -- | A distribution for which a full conditional factor can be produced
19 | class FullConditionable p where
20 |   type FCContext p a :: Constraint
21 |   type FCContext p a = ()
22 |   sampleProb :: FCContext p a => p a -> a -> Double
23 | 


--------------------------------------------------------------------------------
/BayesStack/UniqueKey.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-}
 2 | 
 3 | module BayesStack.UniqueKey ( getUniqueKey
 4 |                             , getValueMap, getKeyMap
 5 |                             , mapTraversable
 6 |                             , UniqueKey, UniqueKeyT
 7 |                             , runUniqueKey, runUniqueKeyT
 8 |                             , runUniqueKey', runUniqueKeyT'
 9 |                             ) where
10 | 
11 | import Prelude hiding (mapM)
12 | import Control.Applicative (Applicative, (<$>))
13 | import Data.Traversable (Traversable, mapM)
14 | import Data.Tuple
15 | import Data.Functor.Identity
16 | 
17 | import Control.Monad.Trans
18 | import Control.Monad.State.Strict hiding (mapM)
19 | 
20 | #if __GLASGOW_HASKELL__ >= 706
21 | import Data.Map.Strict (Map)
22 | import qualified Data.Map.Strict as M
23 | #else
24 | import Data.Map (Map)
25 | import qualified Data.Map as M
26 | #endif
27 | 
28 | -- | 'UniqueKey val key' is a monad for a calculation of a mapping unique keys
29 | -- 'key' onto values 'val'
30 | type UniqueKey val key = UniqueKeyT val key Identity
31 | newtype UniqueKeyT val key m a = UniqueKeyT (StateT ([key], Map val key) m a)
32 |                               deriving (Monad, Applicative, Functor, MonadTrans)
33 | 
34 | -- | Get map of unique keys to values
35 | getKeyMap :: (Monad m, Applicative m, Ord key, Ord val) => UniqueKeyT val key m (Map key val)
36 | getKeyMap = M.fromList . map swap . M.toList <$> getValueMap
37 | 
38 | -- | Get map of values to unique keys
39 | getValueMap :: (Monad m, Applicative m, Ord key, Ord val) => UniqueKeyT val key m (Map val key)
40 | getValueMap = snd <$> UniqueKeyT get
41 | 
42 | popUniqueKey :: Monad m => UniqueKeyT val key m key
43 | popUniqueKey = do
44 |     (keys, a) <- UniqueKeyT get
45 |     case keys of
46 |         key:rest -> UniqueKeyT (put $! (rest, a)) >> return key
47 |         []      -> error "Ran out of unique keys"
48 | 
49 | -- | Find the unique key for value 'val' or 'Nothing' if the value is unknown
50 | findUniqueKey :: (Monad m, Applicative m, Ord key, Ord val) => val -> UniqueKeyT val key m (Maybe key)
51 | findUniqueKey value = M.lookup value <$> getValueMap
52 | 
53 | getUniqueKey :: (Monad m, Applicative m, Ord key, Ord val) => val -> UniqueKeyT val key m key
54 | getUniqueKey x = do
55 |     key <- findUniqueKey x
56 |     case key of
57 |         Just k  -> return k
58 |         Nothing -> do k <- popUniqueKey
59 |                       UniqueKeyT $ modify $ \(keys, keyMap)->(keys, M.insert x k keyMap)
60 |                       return k
61 | 
62 | runUniqueKey :: (Ord key) => [key] -> UniqueKey val key a -> a
63 | runUniqueKey keys = runIdentity . runUniqueKeyT keys
64 | 
65 | runUniqueKeyT :: (Monad m, Ord key) => [key] -> UniqueKeyT val key m a -> m a
66 | runUniqueKeyT keys (UniqueKeyT a) = evalStateT a (keys, M.empty)
67 | 
68 | -- | Run a `UniqueKeyT`, returning the result and the associated key map
69 | runUniqueKeyT' :: (Monad m, Applicative m, Ord key, Ord val) => [key] -> UniqueKeyT val key m a -> m (a, Map key val)
70 | runUniqueKeyT' keys action =
71 |     runUniqueKeyT keys $ do result <- action
72 |                             keyMap <- getKeyMap
73 |                             return (result, keyMap)
74 | 
75 | -- | Run a `UniqueKey`, returning the result and the associated key map
76 | runUniqueKey' :: (Ord key, Ord val) => [key] -> UniqueKey val key a -> (a, Map key val)
77 | runUniqueKey' keys action =
78 |     runUniqueKey keys $ do result <- action
79 |                            keyMap <- getKeyMap
80 |                            return (result, keyMap)
81 | 
82 | mapTraversable :: (Traversable t, Ord key, Ord val) => [key] -> t val -> (t key, Map key val)
83 | mapTraversable keys xs = runUniqueKey' keys $ mapM getUniqueKey xs
84 | 


--------------------------------------------------------------------------------
/Data/Binary/EnumMap.hs:
--------------------------------------------------------------------------------
 1 | module Data.Binary.EnumMap where
 2 | 
 3 | import Data.Binary
 4 | import Data.EnumMap
 5 | 
 6 | instance (Enum k, Binary k, Binary v) => Binary (EnumMap k v) where
 7 |   get = do a <- get
 8 |            return $ fromList a
 9 |   put = put . toList
10 | 


--------------------------------------------------------------------------------
/Data/Random/Sequence.hs:
--------------------------------------------------------------------------------
 1 | module Data.Random.Sequence (randomElementT) where
 2 | 
 3 | import Data.Random
 4 | import Data.Sequence as SQ
 5 | 
 6 | randomElementT :: Seq a -> RVarT m a
 7 | randomElementT xs | SQ.null xs = error "randomElementT: empty seq!"
 8 | randomElementT xs = do
 9 |   n <- uniformT 0 (SQ.length xs - 1)
10 |   return (xs `index` n)
11 | 


--------------------------------------------------------------------------------
/Data/Sequence/Chunk.hs:
--------------------------------------------------------------------------------
 1 | module Data.Sequence.Chunk (chunk) where
 2 | 
 3 | import Data.Sequence as SQ
 4 | 
 5 | -- | 'chunk n xs' splits 'xs' into 'n' chunks
 6 | chunk :: Int -> Seq a -> Seq (Seq a)
 7 | chunk n xs = let m = ceiling $ realToFrac (SQ.length xs) / realToFrac n
 8 |                  f xs | SQ.null xs = SQ.empty
 9 |                  f xs = SQ.take m xs <| (f $ SQ.drop m xs)
10 |              in f xs
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c)2012, Ben Gamari, Laura Dietz
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of Ben Gamari nor the names of other
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/README.mkd:
--------------------------------------------------------------------------------
 1 | # bayes-stack: Parallel MCMC inference on graphical models
 2 | 
 3 | `Bayes-stack` is a framework for parallel probabilistic inference on
 4 | graphical models.  The framework provides infrastructure for easily
 5 | implementing MCMC/Gibbs sampling methods capable of scaling to dozens
 6 | of cores.
 7 | 
 8 | Along with the framework itself, several models using blocked Gibbs
 9 | sampling are provided in `network-topic-models/`. See documentation: [blob/stable/doc/usage.markdown](http://github.com/bgamari/bayes-stack/blob/stable/doc/usage.markdown)
10 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/SharedTasteTest.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TupleSections #-}
  2 | 
  3 | import BayesStack.Core
  4 | import BayesStack.Models.Topic.SharedTaste
  5 | 
  6 | import qualified Data.Map as M
  7 | import qualified Data.Sequence as SQ
  8 | import qualified Data.Set as S
  9 | import qualified Data.EnumMap as EM
 10 | import Data.Foldable (forM_)
 11 | import Data.List
 12 | import Data.Function
 13 | import Data.Tuple (swap)
 14 | import Text.Printf
 15 | import Text.PrettyPrint
 16 | import Data.Random
 17 | import System.Random.MWC
 18 | import Control.Monad.IO.Class
 19 | import Data.Number.LogFloat hiding (realToFrac)
 20 | 
 21 | -- | Shared taste/LDA interpolation parameter
 22 | alphaGammaShared = 0.9
 23 | 
 24 | -- | Number of topics to train
 25 | topics = S.fromList $ [Topic i | i <- [1..3]]
 26 | 
 27 | -- | Number of Gibbs sweeps
 28 | nIter = 1000
 29 | 
 30 | vocabulary :: EM.EnumMap Item String
 31 | vocabulary = EM.fromList $ zip (map Item [1..])
 32 |              $ [ "cats"
 33 |                , "crafts"
 34 |                , "birds"
 35 |                , "dogs"
 36 |                , "christmas"
 37 |                , "Scotland"
 38 |                , "flowers"
 39 |                , "survival"
 40 |                , "dragons"
 41 |                , "vampires"
 42 |                , "angels"
 43 |                , "animals"
 44 |                , "autism"
 45 |                , "thriller"
 46 |                , "historical"
 47 |                , "horror"
 48 |                , "non-fiction"
 49 |                , "fantasy"
 50 |                , "mystery"
 51 |                ]
 52 | 
 53 | revVocabulary :: M.Map String Item
 54 | revVocabulary = M.fromList $ map swap $ EM.assocs vocabulary
 55 | 
 56 | -- | Input data to the model
 57 | stdata = STData { -- Content-enriched network
 58 |                   stNodes            = S.fromList $ map Node [1..5]
 59 |                 , stFriendships      = S.fromList $ map Friendship $
 60 |                                        [ (Node 1, Node 2)
 61 |                                        , (Node 2, Node 4)
 62 |                                        , (Node 2, Node 3)
 63 |                                        , (Node 4, Node 5)
 64 |                                        , (Node 5, Node 1)
 65 |                                        ]
 66 |                 , stItems            = S.fromList $ EM.keys vocabulary
 67 |                 , stNodeItems        = setupNodeItems
 68 |                                        $ concatMap (\(n,items)->map (\i->(n, revVocabulary M.! i)) items)
 69 |                                        $ [ (Node 1, [ "cats", "crafts", "birds"
 70 |                                                     , "christmas", "flowers"
 71 |                                                     ]
 72 |                                            )
 73 |                                          , (Node 2, [ "cats", "birds", "animals"
 74 |                                                     , "angels", "dragons", "horror"
 75 |                                                     , "vampires"
 76 |                                                     ]
 77 |                                            )
 78 |                                          , (Node 3, [ "autism", "survival", "dragons"
 79 |                                                     , "vampires", "angels", "animals"
 80 |                                                     , "survival"
 81 |                                                     ]
 82 |                                            )
 83 |                                          , (Node 4, [ "thriller", "historical", "horror"
 84 |                                                     , "non-fiction", "vampires", "fantasy"
 85 |                                                     ]
 86 |                                            )
 87 |                                          , (Node 5, [ "vampires", "dragons", "angels"
 88 |                                                     , "horror", "fantasy", "thriller"
 89 |                                                     ]
 90 |                                            )
 91 |                                          ]
 92 | 
 93 |                 -- Hyper-parameters and such
 94 |                 , stAlphaGammaShared = alphaGammaShared
 95 |                 , stAlphaGammaOwn    = 1 - alphaGammaShared
 96 |                 , stAlphaPsi         = 0.1
 97 |                 , stAlphaLambda      = 0.1
 98 |                 , stAlphaPhi         = 0.1
 99 |                 , stAlphaOmega       = 0.1
100 |                 , stTopics           = topics
101 |                 }
102 | 
103 | main :: IO ()
104 | main = do
105 |   state <- withSystemRandom $ runModel run
106 |   let maybeInc (Just n) = Just $ n+1
107 |       maybeInc Nothing  = Just 1
108 |       wordCounts = foldl' (\a (n,x)->EM.alter maybeInc x a) EM.empty
109 |                    $ EM.elems $ stNodeItems $ msData state
110 |       totalCounts = EM.size $ stNodeItems $ msData state
111 | 
112 |   liftIO $ putStr "\nTopics:\n"
113 |   forM_ topics $ \t -> do
114 |     let phi = msPhis state EM.! t
115 |         probs = map (sampleProb phi) $ S.toList $ stItems $ msData state
116 |     liftIO $ print $ text (show t) <+> colon
117 |         <+> hsep ( punctuate comma
118 |                  $ map (\(x,p)->text (vocabulary EM.! x) <> parens (text $ printf "%1.2e" p))
119 |                  $ take 10 $ sortBy (flip (compare `on` snd))
120 |                  $ zip (S.toList $ stItems $ msData state) probs
121 |                  )
122 | 
123 |   liftIO $ putStr "\nFriendship weights:\n"
124 |   forM_ (stFriendships stdata) $ \(Friendship (a,b)) -> do
125 |     let psi = msPsis state EM.! a
126 |     liftIO $ putStr $ printf "%s\t%s\t\t%e\t%e\n"
127 |              (show a) (show b)
128 |              (friendInfluence state a b)
129 |              (sampleProb psi b)
130 | 
131 |   liftIO $ putStr "\nShared topic mixtures:\n"
132 |   forM_ (stFriendships stdata) $ \fs@(Friendship (a,b)) -> do
133 |     let lambda = msLambdas state EM.! fs
134 |     liftIO $ putStr $ printf "%s\t%s\t\t" (show a) (show b)
135 |     liftIO $ putStr $ intercalate "\t"
136 |                     $ map (\t->printf "%s(%e)\t" (show t) (sampleProb lambda t))
137 |                     $ S.toList topics
138 |     liftIO $ putStr "\n"
139 | 
140 |   liftIO $ putStr "\nOwn topic mixtures:\n"
141 |   forM_ (stNodes stdata) $ \n -> do
142 |     let omega = msOmegas state EM.! n
143 |     liftIO $ putStr $ printf "%s\t\t" (show n)
144 |     liftIO $ putStr $ intercalate "\t"
145 |                     $ map (\t->printf "%s(%e)\t" (show t) (sampleProb omega t))
146 |                     $ S.toList topics
147 |     liftIO $ putStr "\n"
148 | 
149 | run :: ModelMonad STModelState
150 | run = do
151 |   initial <- liftRVar $ randomInitialize stdata
152 |   (ius, model) <- model stdata initial
153 | 
154 |   state <- getModelState model
155 |   liftIO $ putStrLn $ printf "Model log likelihood after initialization: %e"
156 |                       (logFromLogFloat $ modelLikelihood state :: Double)
157 | 
158 |   liftIO $ putStrLn $ printf "Created %d update units" (SQ.length ius)
159 |   liftIO $ putStrLn "Starting inference..."
160 |   forM_ [1..nIter::Int] $ \sweepN -> do
161 |     concurrentFullGibbsUpdate 10 ius
162 | 
163 |   state <- getModelState model
164 |   liftIO $ putStrLn $ printf "Model log likelihood after %d iterations: %e"
165 |                       nIter (logFromLogFloat $ modelLikelihood state :: Double)
166 |   return state
167 | 


--------------------------------------------------------------------------------
/bayes-stack.cabal:
--------------------------------------------------------------------------------
 1 | Name:                bayes-stack
 2 | 
 3 | Version:             0.2.0.1
 4 | Synopsis:            Framework for inferring generative probabilistic models
 5 |                      with Gibbs sampling
 6 | Description:         bayes-stack is a framework for inference on generative
 7 |                      probabilistic models. The framework uses Gibbs sampling,
 8 |                      although is suitable for other iterative update methods.
 9 | homepage:            https://github.com/bgamari/bayes-stack
10 | License:             BSD3
11 | License-file:        LICENSE
12 | Author:              Ben Gamari
13 | Maintainer:          bgamari.foss@gmail.com
14 | copyright:           Copyright (c) 2012 Ben Gamari
15 | Category:            Math
16 | 
17 | Build-type:          Simple
18 | Cabal-version:       >=1.6
19 | 
20 | source-repository head
21 |   type:                 git
22 |   location:             https://github.com/bgamari/bayes-stack.git
23 | 
24 | Library
25 |   Exposed-modules:     BayesStack.Types,
26 |                        BayesStack.Gibbs, BayesStack.Gibbs.Concurrent,
27 |                        BayesStack.Gibbs.Simple,
28 |                        BayesStack.DirMulti, BayesStack.Dirichlet,
29 |                        BayesStack.UniqueKey,
30 |                        BayesStack.TupleEnum,
31 |                        Data.Binary.EnumMap,
32 |                        Data.Random.Sequence, Data.Sequence.Chunk
33 | 
34 |   Build-depends:       base >=4 && <5,
35 |                        stm,
36 |                        transformers,
37 |                        mtl,
38 |                        deepseq,
39 |                        random-source,
40 |                        random-fu,
41 |                        rvar,
42 |                        containers,
43 |                        enummapset,
44 |                        ghc-prim,
45 |                        vector,
46 |                        mwc-random,
47 |                        pretty,
48 |                        binary >= 0.7 && < 0.8,
49 |                        log-domain >= 0.6,
50 |                        digamma,
51 |                        gamma,
52 |                        statistics
53 | 


--------------------------------------------------------------------------------
/doc/installation.markdown:
--------------------------------------------------------------------------------
 1 | 
 2 | Bayes-Stack and Network Topic Models
 3 | =====================================
 4 | 
 5 | Installing Haskell 7.6.1 
 6 | -------------------------
 7 | 
 8 | 
 9 | The model implementation is in `BayesStack/Models/Topic/SharedTaste.hs`
10 | 
11 | For an example on how to use is, see `SharedTasteTest.hs`
12 | 
13 | 1. Install Haskell Platform 7.4.1 from [http://hackage.haskell.org/platform/](http://hackage.haskell.org/platform/)
14 | 2. Ensure that package libgmp3c2 is available, for instance with `sudo apt-get install libgmp3c2`
15 | 2. Install Haskell GHC 7.6.1 from [http://www.haskell.org/ghc/download_ghc_7_6_1](http://www.haskell.org/ghc/download_ghc_7_6_1)
16 |   - Ignore the large "STOP" note on the website. Simply installing Haskell Platform is not sufficient since Bayes-Stack relies on library features that are currently not available in the GHC version provided by Haskell Platform. Scroll down to "Binary Packages" and download the tarball for your platform.
17 |   - Unzip tarball and cd into the directory
18 |   - If you have root access, install GHC with `./configure; sudo make install`
19 |   - If you do not have root access, you can install GHC to directory of your choice (let's call it `<installdir>`) with `./configure --prefix=<installdir>; make install`  then add `<installdir>/bin` to your `$PATH`
20 |   - check that you have the right version with `ghc -V` respond with version 7.6.1 - *not* 7.4.1!
21 |   - Update the Haskell package list with `cabal update`
22 | 
23 | 
24 | 
25 | Installing Bayes-Stack
26 | ----------------------------
27 | 
28 | - Clone from github by calling `git clone git://github.com/bgamari/bayes-stack.git`
29 | - `cd` into the bayes-stack source directory
30 | - Call `cabal install ./ network-topic-models/`
31 | - It should finish with "Installing executable(s) in ~/.cabal/bin"
32 | - ensure that `~/.cabal/bin/` is permanently your `$PATH` variable
33 | 
34 | 
35 | Testing the Installation
36 | ------------------------
37 | 
38 | - Run `bayes-stack-lda`
39 | - If you see the usage information, you are all set.
40 | 
41 | 
42 | Continue to read about [how to use bayes-stack topic models..](usage.html)
43 | 
44 | 
45 | Bug Reports
46 | ------------
47 | If you come across any issues/bugs/trouble, please submit a bug report to our issue tracker:
48 | [https://github.com/bgamari/bayes-stack/issues/new](https://github.com/bgamari/bayes-stack/issues/new)
49 | 
50 | 


--------------------------------------------------------------------------------
/doc/usage.markdown:
--------------------------------------------------------------------------------
  1 | Using Bayes-Stack Network Topic Models
  2 | =======================================
  3 | 
  4 | Bayes-stack comes with implementations of a various topic models for social network analysis:
  5 | - Shared Taste Model, to analyze topics shared by friends. [Dietz2012]
  6 | - Citation Influence Model, to analyze topics for which a paper is cited. [Dietz2007]
  7 | - Latent Dirichlet allocation, a baseline topic model that only incorporates text, but ignores the network structure. [Blei2003] 
  8 | 
  9 | [Dietz2012]: http://people.cs.umass.edu/~dietz/delayer/dietz-cameraready.pdf "Laura Dietz, Ben Gamari, John Guiver, Edward Snelson, Ralf Herbrich. De-Layering Social Networks by Shared Tastes of Friendships. ICWSM 2012."
 10 | [Dietz2007]: http://www.machinelearning.org/proceedings/icml2007/papers/257.pdf "Laura Dietz, Steffen Bickel, Tobias Scheffer. "Unsupervised Prediction of Citation Influences. ICML 2007."
 11 | [Blei2003]: http://www.cs.princeton.edu/~blei/papers/BleiNgJordan2003.pdf "David Blei, Andrew Ng, Michael Jordan. Latent Dirichlet Allocation. JMLR 2003."
 12 | 
 13 | Probabistic topic models and its seminal basic variant latent Dirichlet allocation [Blei2003] are unsupervised methods for clustering words into so-called topics, taking their context into account. The original work was motivated from a perspective of documents; here we assume that each node as a set of words associated. We therefore use the term `node` instead of `document`. Striving towards applications on tags, songs, books, we also use the `item` instead of `word`. 
 14 | 
 15 | 
 16 | ### Gibbs sampling in Bayes-Stack
 17 | 
 18 | All models are implemented using the bayes-stack framework, which makes it very easy to implement multi-threaded blocked collapsed Gibbs samplers for latent variable and parameter estimation. Gibbs sampling starts with a random initialization of unknown variables, which are iteratively updated in sweeps over all variables for a number of iterations. Every `lag` iterations, bayes-stack will dump the model likelihood to `sweeps/likelihood.log` - a diagnostic measure which should increase on average. Bayes-stack will dump a state of latent variables to the `sweeps` directory whenever a new "best" likelihood is achieved. As early iterations are dominated by the random initialization, the first `burnin` iterations won't lead to a dump. Bayes-stack provides support for updating sets of variables from their joint distribution (aka blocked Gibbs sampler) via the concept of an update unit.
 19 | 
 20 | ### Parallel inference in Bayes-Stack
 21 | 
 22 | Bayes-stack aims at multi-core environments, parallelizing the inference across multiple threads. In bayes-stack each worker thread will repeatedly pick an update unit, fetch the current model state, and compute a new setting for variables in the update unit. It then prepares instructions for updating the model state with this new setting (called a `diff`). One global diff-worker will apply the diffs in batches of size `diff-batch`. Notice that the model state may have advanced in the mean time, but only for a fraction of an iteration. We leverage that Gibbs samplers are robust towards mildy out-of-date state. Bayes-stack ensures the consistency of count statistics.
 23 | 
 24 | Bayes-stack is different to other parallel topic model frameworks in that *it does not* update disjoint sets of variables in isolation for several iterations. It further supports integrating out parameters with conjugative priors (collapsing). Bayes-stack is applicable to any generative model (not only LDA), especially if strong interdependencies between variables and plates exist. Bayes-stack supports arbitrary nesting of plates and conditional draws (aka gates).
 25 | 
 26 | 
 27 | [gates]:http://research.microsoft.com/apps/pubs/default.aspx?id=78857 "Gates"
 28 | 
 29 | ### Hyper-parameter optimization
 30 | 
 31 | Bayes-stack supports optimizing symmetric Dirichlet hyper-parameters (`alpha` in LDA) using Tom Minka's fixed point method. If enabled, an optimization phase is inserted every `hyper-lag` iterations (after `hyper-burnin` iterations). Diagnostic information about new settins of hyper-parameters as well as model likelihood before and after the change are written to `sweeps/hyperparams.log`. It is highly recommended to inspect this diagnostic information before proceeding.
 32 | 
 33 | 
 34 | ### Model inference and analysis
 35 | 
 36 | For each bayes-stack model, two binaries are provided:
 37 | 
 38 | bayes-stack-&lt;model&gt;
 39 | : program to run the Gibbs sampler and dump settings to the sweeps directory
 40 | 
 41 | bayes-stack-dump-&lt;model&gt; &lt;parameter&gt;
 42 | : program to run after the Gibbs sampler finished to analyse the sweeps directory and compute point-estimates for parameters (e.g. `thetas` and `phis`) or posterior inferences (e.g. `influences`) and output as csv files. 
 43 | 
 44 | 
 45 | Network Topic models
 46 | --------------------
 47 | 
 48 | ### Latent Dirichlet allocation
 49 | 
 50 | The intuition of topic model is that two tokens are likely about the same `topic` if they represent the same word, and/or are in the same node. The model does not consider edge structure among the nodes.
 51 | 
 52 | Nomenclature:
 53 | 
 54 | *thetas*
 55 | : for all nodes, the mixture of topics (e.g. node 1 is one third about topic 1, two thirds about topic 5)
 56 | 
 57 | *phis*
 58 | : for all topics, the mixture of items. (e.g. topic 1 has item "soccer" with 0.1 and item "ball" with 0.05)
 59 | 
 60 | Each of the mixtures are represented by a multinomial distribution with a symmetric Dirichlet prior.
 61 | 
 62 | To run an LDA topic model with 10 topics, priors for theta and phi of 0.1, using 5 parallel threads call,
 63 | 
 64 |     bayes-stack-lda --nodes FILE -t10 --prior-theta=0.1 --prior-phi=0.1 --sweeps=ldasweeps --threads=5
 65 | 
 66 | The nodes file must be in the format of,
 67 | 
 68 |      `node id` \t all items (e.g. words) on one line \n
 69 | 
 70 | Words (white-space separated) listed in the stopwords file (if given) are ignored from the nodes file.
 71 | 
 72 | 
 73 | After the Gibbs sampler has finished, inspect `likelihood.log` file (in the sweeps directory) to confirm that the model likelihood converged. If hyperparameter estimation is enabled, also inspect `hyperparams.log` to ensure that parameters are in a reasonable range.
 74 | 
 75 | To output the top 20 items for each topic call
 76 | 
 77 |     bayes-stack-dump-lda phis -n20 --sweeps ldasweeps
 78 | 
 79 | To output the topic mixtures for each node/document call
 80 | 
 81 |     bayes-stack-dump-lda thetas --sweeps ldasweeps
 82 | 
 83 | The output format for `N` multinomial parameters over support `S1`, `S2`, ... is
 84 | 
 85 | <table border="yes">
 86 | <tr><td>N</td><td>&nbsp;</td><td>&nbsp;</td></tr>
 87 | <tr><td>&nbsp;</td><td>S1</td><td>probability</td></tr>
 88 | <tr><td>&nbsp;</td><td>S2</td><td>probability</td></tr>
 89 | <tr><td>&nbsp;</td><td>...</td><td>&nbsp;</td></tr>
 90 | </table>
 91 | 
 92 | ### Shared Taste Model
 93 | 
 94 | The shared taste model is a probabilistic network topic model to understand shared topics that underlie a friendship. The intuition is that shared taste is indicated when two friends are using the same items. If the friends use different items that also have been mutually used in other friendships, it is also likely that they represent the shared taste. This is modeled by introducing topic mixtures of friendships (i.e. edges, not nodes!). Each node associates their items with one of their friends, then draws a topic from the shared topic mixture of that friendship to generate the item. In order to be robust against nodes with individual (that is, non-shared) interests, an item can also be associated with its node's own topic mixture.
 95 | 
 96 | Nomenclature:
 97 | 
 98 | *lamdas*
 99 | : for all edges, the mixture of topics (equivalent to theta in LDA, but shared by two nodes).
100 | 
101 | *phis*
102 | : for all topics,  mixture of items (as in LDA).
103 | 
104 | *psis*
105 | : one global mixture over all edges (for convenience, it is projected onto the set of friends for each user).
106 | 
107 | *omegas*
108 | : for each node, its own mixture of topics.
109 | 
110 | *gammas*
111 | : for each node, a Bernoulli distribution sharing versus own topics. The current version does not support estimating gammas from data. The parameter is fixed and set to the mean of its given Beta prior.
112 | 
113 | 
114 | To run the shared taste model with 10 topics using 5 parallel threads call,
115 | 
116 |      bayes-stack-st --edges FILE --nodes FILE -t10 --sweeps=stsweeps --threads=5 
117 | 
118 | To set Dirichlet priors for all mixture distribution use command line arguments such as `--prior-lambda=0.1`. 
119 | 
120 | 
121 | Two kinds of input data are required. The nodes file has to be in the format,
122 | 
123 |      `node id` \t all items (e.g. words) on one line \n
124 | 
125 | If a stopwords file is given, those items are ignored from the input.
126 | 
127 | The edges file has to list each edge as the two nodes it connects. All edges are undirected, so giving one direction is sufficient, duplicates are ignored. Follow the format:
128 | 
129 |      `node id` \t `node id` \n
130 | 
131 | 
132 | After the Gibbs sampler finished, inspect `likelihood.log` file (in the sweeps directory) to confirm that the model likelihood converged. If hyperparameter estimation is enabled, also inspect `hyperparams.log` to ensure that parameters are in a reasonable range.
133 | 
134 | To output any of the multinomial or Bernoulli parameters listed above (say lambdas) call,
135 | 
136 |      bayes-stack-dump-st lambdas --sweeps stsweeps
137 | 
138 | For multinomial distributions with many dimentions (such as phis) it is advisable to restrict the range, e.g. "-n20".
139 | 
140 | To identify the influencial friends for a particular user call,
141 | 
142 |      bayes-stack-dump-st influences --sweeps stsweeps
143 | 
144 | Notice that `influences` is different from `psis`: If two or more friends share the same taste user, they will have a high `influence`. But in `psis` those friends have to compete for items. We recommend to use `influences` for any social network analysis.
145 | 
146 | 
147 | 
148 | ### Citation influence model
149 | 
150 | The citation influence model is designed to analyze for which topics a document was cited and the strength of its influence on citing papers. Notice that even seminal papers may be cited by papers on which they only have a marginal influence. We treat each document as a node, and each citation as an arc from the citing to the cited paper.
151 | 
152 | The citation influence model captures the topics of a node emphasizing for this topics it is cited (followed, subscribed to, ...). It therefore models shared topics of a cited node and all its citing nodes. (This in unlike the shared taste model, which models sharing across one edge only.)
153 | 
154 | The graph is converted into a bipartite graph of casting each node as a cited node and a separate citing node. Items in citing nodes are explained by topic mixtures of its citations, together with its own topic mixture which captures innovation.
155 | 
156 | Those local influences on a citing node are modeled by mixture over citations `psi`. Each cited node has a mixture over topics `lambda`. Each item in a citing node are associated with one of its citations, and a topic drawn from that cited documents's `lambda`. Further, each item in a cited node are drawn from its `lambda`. The consequence is that a cited node's topic mixture `lambda` is a shared topic mixture, estimated not only from the node's items, but also some items in citing nodes. This is a crucial distinction to LDA. `psi` and `lambda` influence each other: The more likely items in a citing document fit to a topic mixture, the higher its probability  under `psi`; The more items in citing documents are associated with the cited document, the more `lamba` will be representing it.
157 | 
158 | 
159 | Each node in the citation graph will be represented once as a cited document, and once as a citing document. Topics in both duplicates are synchronized via joint influence of `phi`. As some research papers include more novel ideas than others, each citing document also has a topic mixture `omega` for own topics. The propensity to re-use topics from citations versus the introduction of new topics is captured in the Bernoulli parameter `gamma`.
160 | 
161 | Nomenclature:
162 | 
163 | *lambdas*
164 | : for each cited node, the mixture of topics (equivalent to theta in LDA, but modeling shared across the cited node and its citations).
165 | 
166 | *phis*
167 | : for each topic, the mixture of items (as in LDA).
168 | 
169 | *psis*
170 | : for each citing node, the mixture over its cited nodes.
171 | 
172 | *omegas*
173 | : for each citing node, the own mixture of topics.
174 | 
175 | *gammas*
176 | : for each citing node, a Bernoulli distribution for sharing versus own topics. The current version does not support estimating gammas from data. The parameter is fixed and set to the mean of its given Beta prior.
177 | 
178 | 
179 | To run the shared taste model with 10 topics using 5 parallel threads call,
180 | 
181 |      bayes-stack-ci --arcs FILE --nodes FILE -t10 --sweeps=cisweeps --threads=5 
182 | 
183 | To set Dirichlet priors for all mixture distribution use command line arguments such as `--prior-lambda=0.1`. 
184 | 
185 | Two kinds of input data are required. The nodes file has to be in the format,
186 | 
187 |      `node id` \t all items (e.g. words) on one line \n
188 | 
189 | This id is used both as a cited node id as well as a citing node id. If a stopwords file is given, those items are ignored from the input.
190 | 
191 | The arcs file has to list each arc as the source node and sink node. All arcs are directed. You *can* add cycles. Follow the format,
192 | 
193 |      `citing node id` \t `cited node id` \n
194 | 
195 | 
196 | After the Gibbs sampler finished, inspect `likelihood.log` file (in the sweeps directory) to confirm that the model likelihood converged. If hyperparameter estimation is turned on, also inspect `hyperparams.log` to ensure that parameters are in a reasonable range.
197 | 
198 | To output any of the multinomial or Bernoulli parameters listed above (say lambdas) call,
199 | 
200 |      bayes-stack-dump-ci lambdas --sweeps cisweeps
201 | 
202 | For multinomial distributions with many dimentions (such as `phis`) it is advisable to restrict the range, e.g. "-n20".
203 | 
204 | To identify the influencial cited nodes for a particular citing node call,
205 | 
206 |      bayes-stack-dump-ci influences --sweeps cisweeps
207 | 
208 | Notice that `influences` is different from `psis`. Any citation that shares a frequent topic with the node will have a high `influence`. But for generating items, those citations would have to compete with each other, which is reflected in `psis`. We recommend to use `influences` for any social network analysis.
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 
218 | 


--------------------------------------------------------------------------------
/network-topic-models/BayesStack/Models/Topic/CitationInfluenceNoTopics.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TypeFamilies, GeneralizedNewtypeDeriving, DeriveGeneric, TupleSections, RecordWildCards, TemplateHaskell, RankNTypes, FlexibleContexts #-}
  2 | 
  3 | module BayesStack.Models.Topic.CitationInfluenceNoTopics
  4 |   ( -- * Primitives
  5 |     NetData
  6 |   , dHypers, dArcs, dItems, dNodeItems, dCitingNodes, dCitedNodes
  7 |   , netData
  8 |   , HyperParams(..)
  9 |   , MState
 10 |   , stGammas, stOmegas, stPsis, stCiting, stLambdas
 11 |   , CitingUpdateUnit
 12 |   , ItemSource(..)
 13 |   , CitedNode(..), CitedNodeItem(..)
 14 |   , CitingNode(..), CitingNodeItem(..)
 15 |   , Citing(..), Cited(..)
 16 |   , Item(..), Topic(..), NodeItem(..), Node(..), Arc(..)
 17 |   , setupNodeItems
 18 |     -- * Initialization
 19 |   , verifyNetData, cleanNetData
 20 |   , ModelInit
 21 |   , randomInitialize
 22 |   , model
 23 |   , updateUnits
 24 |     -- * Diagnostics
 25 |   , modelLikelihood
 26 |   ) where
 27 | 
 28 | import qualified Data.Vector as V
 29 | import Statistics.Sample (mean)
 30 | 
 31 | import           Prelude hiding (mapM_, sum)
 32 | import           Data.Maybe (fromMaybe)
 33 | 
 34 | import           Control.Lens hiding (Setting)
 35 | import           Data.Set (Set)
 36 | import qualified Data.Set as S
 37 | 
 38 | import           Data.Map.Strict (Map)
 39 | import qualified Data.Map.Strict as M
 40 | 
 41 | import           Data.Foldable hiding (product)
 42 | import           Control.Applicative ((<$>), (<*>))
 43 | import           Control.Monad (when)
 44 | import           Control.Monad.Trans.State.Strict
 45 | import           Control.Monad.Trans.Writer.Strict
 46 | 
 47 | import           Data.Random
 48 | import           Data.Random.Lift (lift)
 49 | import           Data.Random.Distribution.Categorical (categorical)
 50 | import           Numeric.Log hiding (sum)
 51 | 
 52 | import           BayesStack.Types
 53 | import           BayesStack.Gibbs
 54 | import           BayesStack.DirMulti
 55 | import           BayesStack.TupleEnum ()
 56 | import           BayesStack.Models.Topic.Types
 57 | 
 58 | import           GHC.Generics (Generic)
 59 | import           Data.Binary (Binary)
 60 | import           Control.DeepSeq
 61 | 
 62 | at' :: At m => Index m -> IndexedLens' (Index m) m (IxValue m)
 63 | at' i = at i . _fromMaybe
 64 |   where _fromMaybe = iso (fromMaybe $ error "at': Unexpected Nothing") Just
 65 | 
 66 | data ItemSource = Shared | Own deriving (Show, Eq, Enum, Ord, Generic)
 67 | instance Binary ItemSource
 68 | instance NFData ItemSource
 69 | 
 70 | newtype Citing a = Citing a deriving (Show, Eq, Enum, Ord, Generic, NFData)
 71 | newtype Cited a = Cited a deriving (Show, Eq, Enum, Ord, Generic, NFData)
 72 | instance Binary a => Binary (Citing a)
 73 | instance Binary a => Binary (Cited a)
 74 | 
 75 | type CitingNode = Citing Node
 76 | type CitedNode = Cited Node
 77 | type CitingNodeItem = Citing NodeItem
 78 | type CitedNodeItem = Cited NodeItem
 79 | 
 80 | -- ^ A directed edge
 81 | data Arc = Arc { citingNode :: !CitingNode, citedNode :: !CitedNode }
 82 |             deriving (Show, Eq, Ord, Generic)
 83 | instance Binary Arc
 84 | 
 85 | data HyperParams = HyperParams
 86 |                    { _alphaPsi         :: Double
 87 |                    , _alphaLambda      :: Double
 88 |                    , _alphaOmega       :: Double
 89 |                    , _alphaGammaShared :: Double
 90 |                    , _alphaGammaOwn    :: Double
 91 |                    , _alphaBetaFG      :: Double
 92 |                    , _alphaBetaBG      :: Double
 93 |                    }
 94 |                  deriving (Show, Eq, Generic)
 95 | instance Binary HyperParams
 96 | makeLenses ''HyperParams         
 97 | 
 98 | data NetData = NetData { _dHypers             :: !(HyperParams)
 99 |                        , _dArcs               :: !(Set Arc)
100 |                        , _dItems              :: !(Map Item Double)
101 |                        , _dNodeItems          :: !(Map NodeItem (Node, Item))
102 |                        , _dCitingNodes        :: !(Map CitingNode (Set CitedNode))
103 |                          -- ^ Maps each citing node to the set of nodes cited by it
104 |                        , _dCitedNodes         :: !(Map CitedNode (Set CitingNode))
105 |                          -- ^ Maps each cited node to the set of nodes citing it
106 |                        }
107 |               deriving (Show, Eq, Generic)
108 | instance Binary NetData
109 | makeLenses ''NetData         
110 | 
111 | netData :: HyperParams -> Set Arc -> Map Item Double -> Map NodeItem (Node,Item) -> NetData
112 | netData hypers arcs items nodeItems =
113 |     NetData { _dHypers       = hypers
114 |             , _dArcs         = arcs
115 |             , _dItems        = items
116 |             , _dNodeItems    = nodeItems
117 |             , _dCitingNodes  = M.unionsWith S.union
118 |                                $ map (\(Arc a b)->M.singleton a $ S.singleton b)
119 |                                $ S.toList arcs
120 |             , _dCitedNodes   = M.unionsWith S.union
121 |                                $ map (\(Arc a b)->M.singleton b $ S.singleton a)
122 |                                $ S.toList arcs
123 |             }
124 | 
125 | dCitingNodeItems :: NetData -> Map CitingNodeItem (CitingNode, Item)
126 | dCitingNodeItems nd =
127 |     M.mapKeys Citing
128 |     $ M.map (\(n,i)->(Citing n, i))
129 |     $ M.filter (\(n,i)->Citing n `M.member` (nd^.dCitingNodes))
130 |     $ nd^.dNodeItems
131 | 
132 | itemsOfCitingNode :: NetData -> CitingNode -> [Item]
133 | itemsOfCitingNode d (Citing u) =
134 |     map snd $ M.elems $ M.filter (\(n,_)->n==u) $ d^.dNodeItems
135 | 
136 | connectedNodes :: Set Arc -> Set Node
137 | connectedNodes arcs =
138 |     S.map ((\(Cited n)->n) . citedNode) arcs `S.union` S.map ((\(Citing n)->n) . citingNode) arcs
139 | 
140 | cleanNetData :: NetData -> NetData
141 | cleanNetData d =
142 |     let nodesWithItems = S.fromList $ map fst $ M.elems $ d^.dNodeItems
143 |         nodesWithArcs = connectedNodes $ d^.dArcs
144 |         keptNodes = nodesWithItems `S.intersection` nodesWithArcs
145 |         keepArc (Arc (Citing citing) (Cited cited)) =
146 |             citing `S.member` keptNodes && cited `S.member` keptNodes
147 |         go = do dArcs %= S.filter keepArc
148 |                 dNodeItems %= M.filter (\(n,i)->n `S.member` keptNodes)
149 |     in execState go d
150 | 
151 | verifyNetData :: (Node -> String) -> NetData -> [String]
152 | verifyNetData showNode d = execWriter $ do
153 |     let nodesWithItems = S.fromList $ map fst $ M.elems $ d^.dNodeItems
154 |     forM_ (d^.dArcs) $ \(Arc (Citing citing) (Cited cited))->do
155 |         when (cited `S.notMember` nodesWithItems)
156 |             $ tell [showNode cited++" has arc yet has no items"]
157 |         when (citing `S.notMember` nodesWithItems)
158 |             $ tell [showNode citing++" has arc yet has no items"]
159 | 
160 | -- Citing Update unit (Shared Taste-like)
161 | data CitingUpdateUnit = CitingUpdateUnit { _uuNI    :: CitingNodeItem
162 |                                          , _uuN     :: CitingNode
163 |                                          , _uuX     :: Item
164 |                                          , _uuCites :: Set CitedNode
165 |                                          , _uuItemWeight :: Double
166 |                                          }
167 |                       deriving (Show, Generic)
168 | instance Binary CitingUpdateUnit
169 | makeLenses ''CitingUpdateUnit
170 | 
171 | citingUpdateUnits :: NetData -> [CitingUpdateUnit]
172 | citingUpdateUnits d =
173 |     map (\(ni,(n,x))->CitingUpdateUnit { _uuNI      = ni
174 |                                        , _uuN       = n
175 |                                        , _uuX       = x
176 |                                        , _uuCites   = d^.dCitingNodes . at' n
177 |                                        , _uuItemWeight = (d ^. dItems . at' x)
178 |                                        }
179 |         ) $ M.assocs $ dCitingNodeItems d
180 | 
181 | updateUnits :: NetData -> [WrappedUpdateUnit MState]
182 | updateUnits d = map WrappedUU (citingUpdateUnits d)
183 | 
184 | -- | Model State            
185 | data CitingSetting = OwnSetting
186 |                    | SharedSetting !CitedNode
187 |                    deriving (Show, Eq, Generic)
188 | instance Binary CitingSetting
189 | instance NFData CitingSetting where
190 |     rnf (OwnSetting)      = ()
191 |     rnf (SharedSetting c) = rnf c `seq` ()
192 | 
193 | data MState = MState { -- Citing model state
194 |                        _stGammas   :: !(Map CitingNode (Multinom Int ItemSource))
195 |                      , _stOmegas   :: !(Map CitingNode (Multinom Int Item))
196 |                      , _stPsis     :: !(Map CitingNode (Multinom Int CitedNode))
197 | 
198 |                      , _stCiting   :: !(Map CitingNodeItem CitingSetting)
199 | 
200 |                      -- Cited model state
201 |                      , _stLambdas  :: !(Map CitedNode (Multinom Int Item))
202 |                      }
203 |             deriving (Show, Generic)
204 | instance Binary MState
205 | makeLenses ''MState         
206 | 
207 | -- | Model initialization            
208 | type ModelInit = Map CitingNodeItem (Setting CitingUpdateUnit)
209 | 
210 | modify' :: Monad m => (a -> a) -> StateT a m ()
211 | modify' f = do x <- get
212 |                put $! f x
213 | 
214 | randomInitializeCiting :: NetData -> ModelInit -> RVar ModelInit
215 | randomInitializeCiting d init = execStateT doInit init
216 |     where doInit :: StateT ModelInit RVar ()
217 |           doInit = let unset = M.keysSet (dCitingNodeItems d) `S.difference` M.keysSet init
218 |                    in mapM_ (randomInitCitingUU d) (S.toList unset)
219 | 
220 | randomInitCitingUU :: NetData -> CitingNodeItem -> StateT ModelInit RVar ()
221 | randomInitCitingUU d cni@(Citing ni) =
222 |     let (n,_) = d ^. dNodeItems . at' ni
223 |     in case d ^. dCitingNodes . at' (Citing n) of
224 |            a | S.null a -> do
225 |                modify' $ M.insert cni OwnSetting
226 | 
227 |            citedNodes -> do
228 |                s <- lift $ randomElement [Shared, Own]
229 |                c <- lift $ randomElement $ toList citedNodes
230 |                modify' $ M.insert cni $
231 |                    case s of Shared -> SharedSetting c
232 |                              Own    -> OwnSetting
233 | 
234 | randomInitialize :: NetData -> RVar ModelInit
235 | randomInitialize d = randomInitializeCiting d M.empty
236 | 
237 | model :: NetData -> ModelInit -> MState
238 | model d citingInit =
239 |     let citingNodes = M.keys $ d^.dCitingNodes
240 |         hp = d^.dHypers
241 |         s = MState { -- Citing model
242 |                      _stPsis = let dist n = case d ^. dCitingNodes . at' n . to toList of
243 |                                                 []    -> M.empty
244 |                                                 nodes -> M.singleton n
245 |                                                          $ symDirMulti (hp^.alphaPsi) nodes
246 |                                in foldMap dist citingNodes
247 |                    , _stGammas = let dist = multinom [ (Shared, hp^.alphaGammaShared)
248 |                                                      , (Own, hp^.alphaGammaOwn) ]
249 |                                  in foldMap (\t->M.singleton t dist) citingNodes
250 |                    , _stOmegas = let dist = symDirMulti (hp^.alphaOmega) (M.keys $ d^.dItems)
251 |                                  in foldMap (\t->M.singleton t dist) citingNodes
252 |                    , _stCiting = M.empty
253 | 
254 |                    -- Cited model
255 |                    , _stLambdas = let dist = symDirMulti (hp^.alphaLambda) (M.keys $ d^.dItems)
256 |                                       lambdas0 = foldMap (\n->M.singleton n dist) $ M.keys $ d^.dCitedNodes
257 |                                   in foldl' (\dms (n,x)->M.adjust (incMultinom x) (Cited n) dms) lambdas0 (M.elems $ d^.dNodeItems)
258 |                    }
259 | 
260 |         initCitingUU :: CitingUpdateUnit -> State MState ()
261 |         initCitingUU uu = do
262 |             let err = error $ "CitationInference: Initial value for "++show uu++" not given\n"
263 |                 s = maybe err id $ M.lookup (uu^.uuNI) citingInit
264 |             modify' $ setCitingUU uu (Just s)
265 | 
266 |     in execState (mapM_ initCitingUU $ citingUpdateUnits d) s
267 | 
268 | modelLikelihood :: MState -> Probability
269 | modelLikelihood model =
270 |     product $ (model ^.. stGammas  . folded . to likelihood)
271 |            ++ (model ^.. stLambdas . folded . to likelihood)
272 |            ++ (model ^.. stOmegas  . folded . to likelihood)
273 |            ++ (model ^.. stPsis    . folded . to likelihood)
274 | 
275 | instance UpdateUnit CitingUpdateUnit where
276 |     type ModelState CitingUpdateUnit = MState
277 |     type Setting CitingUpdateUnit = CitingSetting
278 |     fetchSetting uu ms = ms ^. stCiting . at' (uu^.uuNI)
279 |     evolveSetting ms uu = categorical $ citingFullCond (setCitingUU uu Nothing ms) uu
280 |     updateSetting uu _ s' = setCitingUU uu (Just s') . setCitingUU uu Nothing
281 | 
282 | citingProb :: MState -> CitingUpdateUnit -> Setting CitingUpdateUnit -> Double
283 | citingProb st (CitingUpdateUnit {_uuN=n, _uuX=x}) setting =
284 |     let gamma = st ^. stGammas . at' n
285 |         omega = st ^. stOmegas . at' n
286 |         psi = st ^. stPsis . at' n
287 |     in case setting of
288 |         SharedSetting c   -> let lambda = st ^. stLambdas . at' c
289 |                              in sampleProb gamma Shared
290 |                               * sampleProb psi c
291 |                               * sampleProb lambda x
292 |         OwnSetting        ->  sampleProb gamma Own
293 |                             * sampleProb omega x
294 | 
295 | citingFullCond :: MState -> CitingUpdateUnit -> [(Double, Setting CitingUpdateUnit)]
296 | citingFullCond ms uu = map (\s->(citingProb ms uu s, s)) $ citingDomain ms uu
297 | 
298 | citingDomain :: MState -> CitingUpdateUnit -> [Setting CitingUpdateUnit]
299 | citingDomain ms uu = do
300 |     s <- [Own, Shared]
301 |     case s of
302 |         Shared -> do c <- uu ^. uuCites . to S.toList
303 |                      return $ SharedSetting c
304 |         Own    -> do return $ OwnSetting
305 | 
306 | setCitingUU :: CitingUpdateUnit -> Maybe (Setting CitingUpdateUnit) -> MState -> MState
307 | setCitingUU uu@(CitingUpdateUnit {_uuNI=ni, _uuN=n, _uuX=x}) setting ms = execState go ms
308 |   where
309 |     set = maybe Unset (const Set) setting
310 |     go = case maybe (fetchSetting uu ms) id setting of
311 |            SharedSetting c    -> do stPsis .    at' n %= setMultinom set c
312 |                                     stLambdas . at' c %= setMultinom set x
313 |                                     stGammas .  at' n %= setMultinom set Shared
314 |                                     stCiting .  at ni .= setting
315 | 
316 |            OwnSetting         -> do stOmegas .  at' n %= setMultinom set x
317 |                                     stGammas .  at' n %= setMultinom set Own
318 |                                     stCiting .  at ni .= setting
319 |   


--------------------------------------------------------------------------------
/network-topic-models/BayesStack/Models/Topic/LDA.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TypeFamilies, GeneralizedNewtypeDeriving, DeriveGeneric, RecordWildCards #-}
  2 | 
  3 | module BayesStack.Models.Topic.LDA
  4 |   ( -- * Primitives
  5 |     NetData(..)
  6 |   , HyperParams(..)
  7 |   , MState(..)
  8 |   , LDAUpdateUnit
  9 |   , Node(..), Item(..), Topic(..)
 10 |   , NodeItem(..), setupNodeItems
 11 |     -- * Initialization
 12 |   , ModelInit
 13 |   , randomInitialize
 14 |   , model, updateUnits
 15 |     -- * Hyperparameter estimation
 16 |   , reestimate, reestimatePhis, reestimateThetas
 17 |     -- * Diagnostics
 18 |   , modelLikelihood
 19 |   ) where
 20 | 
 21 | import Prelude hiding (mapM_)
 22 | 
 23 | import Data.Set (Set)
 24 | import qualified Data.Set as S
 25 | 
 26 | import Data.Map.Strict (Map)
 27 | import qualified Data.Map.Strict as M
 28 | 
 29 | import Data.Traversable
 30 | import Data.Foldable hiding (product)
 31 | import Data.Monoid
 32 | 
 33 | import Control.Monad (liftM)
 34 | import Control.Monad.Trans.State.Strict
 35 | import Data.Random
 36 | import Data.Random.Distribution.Categorical (categorical)
 37 | 
 38 | import BayesStack.Types
 39 | import BayesStack.Gibbs
 40 | import BayesStack.DirMulti
 41 | import BayesStack.TupleEnum ()
 42 | import BayesStack.Models.Topic.Types
 43 | 
 44 | import GHC.Generics
 45 | import Data.Binary (Binary)
 46 | 
 47 | data HyperParams = HyperParams
 48 |                    { alphaTheta       :: Double
 49 |                    , alphaPhi         :: Double
 50 |                    }
 51 |                  deriving (Show, Eq, Generic)
 52 | instance Binary HyperParams     
 53 | 
 54 | data NetData = NetData { dHypers   :: !HyperParams
 55 |                        , dNodes :: !(Set Node)
 56 |                        , dItems :: !(Set Item)
 57 |                        , dTopics :: !(Set Topic)
 58 |                        , dNodeItems :: !(Map NodeItem (Node, Item))
 59 |                        }
 60 |                deriving (Show, Eq, Generic)
 61 | instance Binary NetData
 62 | 
 63 | type ModelInit = Map NodeItem Topic
 64 | 
 65 | randomInitialize' :: NetData -> ModelInit -> RVar ModelInit
 66 | randomInitialize' d init =
 67 |   let unset = M.keysSet (dNodeItems d) `S.difference` M.keysSet init
 68 |       topics = S.toList $ dTopics d
 69 |       randomInit :: NodeItem -> RVar ModelInit
 70 |       randomInit ni = liftM (M.singleton ni) $ randomElement topics
 71 |   in liftM mconcat $ forM (S.toList unset) randomInit
 72 | 
 73 | randomInitialize :: NetData -> RVar ModelInit
 74 | randomInitialize = (flip randomInitialize') M.empty
 75 | 
 76 | updateUnits :: NetData -> [WrappedUpdateUnit MState]
 77 | updateUnits = map WrappedUU . updateUnits'
 78 | 
 79 | updateUnits' :: NetData -> [LDAUpdateUnit]
 80 | updateUnits' =
 81 |     map (\(ni,(n,x))->LDAUpdateUnit {uuNI=ni, uuN=n, uuX=x}) . M.assocs . dNodeItems
 82 | 
 83 | model :: NetData -> ModelInit -> MState
 84 | model d init =
 85 |     let uus = updateUnits' d
 86 |         s = MState { stThetas = foldMap (\n->M.singleton n (symDirMulti alphaTheta (toList $ dTopics d)))
 87 |                                 $ dNodes d
 88 |                    , stPhis = foldMap (\t->M.singleton t (symDirMulti alphaPhi (toList $ dItems d)))
 89 |                               $ dTopics d
 90 |                    , stT = M.empty
 91 |                    }
 92 |         HyperParams {..} = dHypers d
 93 |     in execState (mapM_ (\uu->modify' $ setUU uu (Just $ M.findWithDefault (Topic 0) (uuNI uu) init)) uus) s
 94 | 
 95 | modify' :: (s -> s) -> State s ()
 96 | modify' f = do
 97 |     a <- get
 98 |     put $! f a
 99 | 
100 | data MState = MState { stThetas :: !(Map Node (Multinom Int Topic))
101 |                      , stPhis   :: !(Map Topic (Multinom Int Item))
102 |                      , stT      :: !(Map NodeItem Topic)
103 |                      }
104 |             deriving (Show, Generic)
105 | instance Binary MState
106 | 
107 | data LDAUpdateUnit = LDAUpdateUnit { uuNI :: NodeItem
108 |                                    , uuN  :: Node
109 |                                    , uuX  :: Item
110 |                                    }
111 |                    deriving (Show, Generic)
112 | instance Binary LDAUpdateUnit
113 | 
114 | setUU :: LDAUpdateUnit -> Maybe Topic -> MState -> MState
115 | setUU uu@(LDAUpdateUnit {uuN=n, uuNI=ni, uuX=x}) setting ms =
116 |     let t = maybe (fetchSetting uu ms) id setting
117 |         set = maybe Unset (const Set) setting
118 |     in ms { stPhis = M.adjust (setMultinom set x) t (stPhis ms)
119 |           , stThetas = M.adjust (setMultinom set t) n (stThetas ms)
120 |           , stT = case setting of Just _  -> M.insert ni t $ stT ms
121 |                                   Nothing -> stT ms
122 |           }
123 | 
124 | instance UpdateUnit LDAUpdateUnit where
125 |     type ModelState LDAUpdateUnit = MState
126 |     type Setting LDAUpdateUnit = Topic
127 |     fetchSetting (LDAUpdateUnit {uuNI=ni}) ms = stT ms M.! ni
128 |     evolveSetting ms uu = categorical $ ldaFullCond (setUU uu Nothing ms) uu
129 |     updateSetting uu _ s' = setUU uu (Just s') . setUU uu Nothing
130 | 
131 | uuProb :: MState -> LDAUpdateUnit -> Topic -> Double
132 | uuProb state (LDAUpdateUnit {uuN=n, uuX=x}) t =
133 |     let theta = stThetas state M.! n
134 |         phi = stPhis state M.! t
135 |     in realToFrac $ sampleProb theta t * sampleProb phi x
136 | 
137 | ldaFullCond :: MState -> LDAUpdateUnit -> [(Double, Topic)]
138 | ldaFullCond ms uu = do
139 |     t <- uuDomain ms uu
140 |     return (uuProb ms uu t, t)
141 | 
142 | uuDomain :: MState -> LDAUpdateUnit -> [Topic]
143 | uuDomain ms uu = M.keys $ stPhis ms
144 | 
145 | modelLikelihood :: MState -> Probability
146 | modelLikelihood model =
147 |     product (map likelihood $ M.elems $ stThetas model)
148 |   * product (map likelihood $ M.elems $ stPhis model)
149 | 
150 | -- | Re-estimate phi hyperparameter
151 | reestimatePhis :: MState -> MState
152 | reestimatePhis ms = ms { stPhis = reestimateSymPriors $ stPhis ms }
153 | 
154 | -- | Re-estimate theta hyperparameter
155 | reestimateThetas :: MState -> MState
156 | reestimateThetas ms = ms { stThetas = reestimateSymPriors $ stThetas ms }
157 | 
158 | reestimate :: MState -> MState
159 | reestimate = reestimatePhis . reestimateThetas
160 | 


--------------------------------------------------------------------------------
/network-topic-models/BayesStack/Models/Topic/LDARelevance.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TypeFamilies, GeneralizedNewtypeDeriving, DeriveGeneric, FlexibleInstances, RecordWildCards #-}
  2 | 
  3 | module BayesStack.Models.Topic.LDARelevance
  4 |   ( -- * Primitives
  5 |     NetData(..)
  6 |   , HyperParams(..)
  7 |   , MState(..)
  8 |   , LDAUpdateUnit
  9 |   , ItemWeight
 10 |   , Node(..), Item(..), Topic(..)
 11 |   , NodeItem(..), setupNodeItems
 12 |     -- * Initialization
 13 |   , ModelInit
 14 |   , randomInitialize
 15 |   , model, updateUnits
 16 |     -- * Hyperparameter estimation
 17 |   , reestimate, reestimatePhis, reestimateThetas
 18 |     -- * Diagnostics
 19 |   , modelLikelihood
 20 |   ) where
 21 | 
 22 | import Prelude hiding (mapM)
 23 | 
 24 | import Data.Set (Set)
 25 | import qualified Data.Set as S
 26 | 
 27 | import Data.Map.Strict (Map)
 28 | import qualified Data.Map.Strict as M
 29 | 
 30 | import Data.Traversable
 31 | import Data.Foldable hiding (product)
 32 | import Data.Monoid
 33 | 
 34 | import Control.Monad (liftM)
 35 | import Control.Monad.Trans.State
 36 | import Data.Random
 37 | import Data.Random.Distribution.Categorical (categorical)
 38 | 
 39 | import BayesStack.Types
 40 | import BayesStack.Gibbs
 41 | import BayesStack.DirMulti
 42 | import BayesStack.TupleEnum ()
 43 | import BayesStack.Models.Topic.Types
 44 | 
 45 | import GHC.Generics
 46 | import Data.Binary as B
 47 | import Data.Fixed
 48 | 
 49 | type ItemWeight = Micro
 50 | 
 51 | data HyperParams = HyperParams
 52 |                    { alphaTheta       :: Double
 53 |                    , alphaPhi         :: Double
 54 |                    }
 55 |                  deriving (Show, Eq, Generic)
 56 | instance Binary HyperParams     
 57 | 
 58 | data NetData = NetData { dHypers :: !HyperParams
 59 |                        , dNodes :: !(Set Node)
 60 |                        , dItems :: !(Map Item ItemWeight)
 61 |                        , dTopics :: !(Set Topic)
 62 |                        , dNodeItems :: !(Map NodeItem (Node, Item))
 63 |                        }
 64 |                deriving (Show, Eq, Generic)
 65 | instance Binary NetData
 66 | 
 67 | type ModelInit = Map NodeItem Topic
 68 | 
 69 | randomInitialize' :: NetData -> ModelInit -> RVar ModelInit
 70 | randomInitialize' d init =
 71 |   let unset = M.keysSet (dNodeItems d) `S.difference` M.keysSet init
 72 |       topics = S.toList $ dTopics d
 73 |       randomInit :: NodeItem -> RVar ModelInit
 74 |       randomInit ni = liftM (M.singleton ni) $ randomElement topics
 75 |   in liftM mconcat $ forM (S.toList unset) randomInit
 76 | 
 77 | randomInitialize :: NetData -> RVar ModelInit
 78 | randomInitialize = (flip randomInitialize') M.empty
 79 | 
 80 | updateUnits :: NetData -> [WrappedUpdateUnit MState]
 81 | updateUnits = map WrappedUU . updateUnits'
 82 | 
 83 | updateUnits' :: NetData -> [LDAUpdateUnit]
 84 | updateUnits' nd =
 85 |     map (\(ni,(n,x))->LDAUpdateUnit { uuNI=ni, uuN=n, uuX=x
 86 |                                     , uuW=dItems nd M.! x
 87 |                                     })
 88 |     $ M.assocs $ dNodeItems nd
 89 | 
 90 | model :: NetData -> ModelInit -> MState
 91 | model d init =
 92 |     let uus = updateUnits' d
 93 |         s = MState { stThetas = foldMap (\n->M.singleton n (symDirMulti alphaTheta (toList $ dTopics d)))
 94 |                                 $ dNodes d
 95 |                    , stPhis = foldMap (\t->M.singleton t (symDirMulti alphaPhi (M.keys $ dItems d)))
 96 |                               $ dTopics d
 97 |                    , stT = M.empty
 98 |                    }
 99 |         HyperParams {..} = dHypers d
100 |     in execState (mapM (\uu->modify $ setUU uu (Just $ M.findWithDefault (Topic 0) (uuNI uu) init)) uus) s
101 | 
102 | data MState = MState { stThetas :: !(Map Node (Multinom Int Topic))
103 |                      , stPhis   :: !(Map Topic (Multinom ItemWeight Item))
104 |                      , stT      :: !(Map NodeItem Topic)
105 |                      }
106 |             deriving (Show, Generic)
107 | instance Binary MState
108 | 
109 | data LDAUpdateUnit = LDAUpdateUnit { uuNI :: NodeItem
110 |                                    , uuN  :: Node
111 |                                    , uuX  :: Item
112 |                                    , uuW  :: ItemWeight
113 |                                    }
114 |                    deriving (Show, Generic)
115 | instance Binary LDAUpdateUnit
116 | 
117 | instance Binary (Fixed E6) where
118 |     get = do a <- B.get :: Get Int
119 |              return $ fromIntegral a / 1000000
120 |     put = (B.put :: Int -> Put) . round . (*1000000)
121 | 
122 | setUU :: LDAUpdateUnit -> Maybe Topic -> MState -> MState
123 | setUU uu@(LDAUpdateUnit {uuN=n, uuNI=ni, uuX=x, uuW=w}) setting ms =
124 |     let t = maybe (fetchSetting uu ms) id setting
125 |         set = maybe Unset (const Set) setting
126 |         setPhi = case setting of
127 |                      Just _  -> addMultinom w x
128 |                      Nothing -> subMultinom w x
129 |     in ms { stPhis = M.adjust setPhi t (stPhis ms)
130 |           , stThetas = M.adjust (setMultinom set t) n (stThetas ms)
131 |           , stT = case setting of Just _  -> M.insert ni t $ stT ms
132 |                                   Nothing -> stT ms
133 |           }
134 | 
135 | instance UpdateUnit LDAUpdateUnit where
136 |     type ModelState LDAUpdateUnit = MState
137 |     type Setting LDAUpdateUnit = Topic
138 |     fetchSetting (LDAUpdateUnit {uuNI=ni}) ms = stT ms M.! ni
139 |     evolveSetting ms uu = categorical $ ldaFullCond (setUU uu Nothing ms) uu
140 |     updateSetting uu _ s' = setUU uu (Just s') . setUU uu Nothing
141 | 
142 | uuProb :: MState -> LDAUpdateUnit -> Topic -> Double
143 | uuProb state (LDAUpdateUnit {uuN=n, uuX=x}) t =
144 |     let theta = stThetas state M.! n
145 |         phi = stPhis state M.! t
146 |     in realToFrac $ sampleProb theta t * sampleProb phi x
147 | 
148 | ldaFullCond :: MState -> LDAUpdateUnit -> [(Double, Topic)]
149 | ldaFullCond ms uu = do
150 |     t <- uuDomain ms uu
151 |     return (uuProb ms uu t, t)
152 | 
153 | uuDomain :: MState -> LDAUpdateUnit -> [Topic]
154 | uuDomain ms uu = M.keys $ stPhis ms
155 | 
156 | modelLikelihood :: MState -> Probability
157 | modelLikelihood model =
158 |     product (map likelihood $ M.elems $ stThetas model)
159 |   * product (map likelihood $ M.elems $ stPhis model)
160 | 
161 | -- | Re-estimate phi hyperparameter
162 | reestimatePhis :: MState -> MState
163 | reestimatePhis ms = ms { stPhis = reestimateSymPriors $ stPhis ms }
164 | 
165 | -- | Re-estimate theta hyperparameter
166 | reestimateThetas :: MState -> MState
167 | reestimateThetas ms = ms { stThetas = reestimateSymPriors $ stThetas ms }
168 | 
169 | reestimate :: MState -> MState
170 | reestimate = reestimatePhis . reestimateThetas
171 | 


--------------------------------------------------------------------------------
/network-topic-models/BayesStack/Models/Topic/SharedTaste.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TypeFamilies, GeneralizedNewtypeDeriving, DeriveGeneric, RecordWildCards #-}
  2 | 
  3 | module BayesStack.Models.Topic.SharedTaste
  4 |   ( -- * Primitives
  5 |     NetData(..)
  6 |   , HyperParams(..)
  7 |   , MState(..)
  8 |   , STUpdateUnit
  9 |   , ItemSource(..)
 10 |   , Node(..), Item(..), Topic(..), Edge(..)
 11 |   , NodeItem(..), setupNodeItems
 12 |     -- * Initialization
 13 |   , ModelInit
 14 |   , randomInitialize
 15 |   , model, updateUnits
 16 |     -- * Diagnostics
 17 |   , modelLikelihood
 18 |   , influence
 19 |   ) where
 20 | 
 21 | import Prelude hiding (mapM, sum)
 22 | 
 23 | import Data.Set (Set)
 24 | import qualified Data.Set as S
 25 | 
 26 | import Data.Map.Strict (Map)
 27 | import qualified Data.Map.Strict as M
 28 | 
 29 | import Data.Traversable
 30 | import Data.Foldable hiding (product)
 31 | import Data.Monoid
 32 | 
 33 | import Control.DeepSeq
 34 | import Control.Monad (liftM)
 35 | import Control.Monad.Trans.State
 36 | import Data.Random
 37 | import Data.Random.Distribution.Categorical (categorical)
 38 | import Numeric.Log hiding (sum)
 39 | 
 40 | import BayesStack.Types
 41 | import BayesStack.Gibbs
 42 | import BayesStack.DirMulti
 43 | import BayesStack.TupleEnum ()
 44 | import BayesStack.Models.Topic.Types
 45 | 
 46 | import GHC.Generics
 47 | import Data.Binary
 48 | 
 49 | data ItemSource = Shared | Own
 50 |                 deriving (Show, Eq, Generic, Enum, Ord)
 51 | instance Binary ItemSource
 52 | instance NFData ItemSource
 53 | 
 54 | data HyperParams = HyperParams
 55 |                    { alphaPsi         :: Double
 56 |                    , alphaLambda      :: Double
 57 |                    , alphaPhi         :: Double
 58 |                    , alphaOmega       :: Double
 59 |                    , alphaGammaShared :: Double
 60 |                    , alphaGammaOwn    :: Double
 61 |                    }
 62 |                  deriving (Show, Eq, Generic)
 63 | instance Binary HyperParams     
 64 | 
 65 | data NetData = NetData { dHypers             :: !HyperParams
 66 |                        , dEdges              :: !(Set Edge)
 67 |                        , dItems              :: !(Set Item)
 68 |                        , dTopics             :: !(Set Topic)
 69 |                        , dNodeItems          :: !(Map NodeItem (Node, Item))
 70 |                        }
 71 |              deriving (Show, Eq, Generic)
 72 | instance Binary NetData
 73 | 
 74 | dNodes :: NetData -> Set Node
 75 | dNodes = S.fromList . map fst . M.elems . dNodeItems
 76 | 
 77 | dAdjNodes :: NetData -> Node -> Set Node
 78 | dAdjNodes nd n = S.fromList $ getFriends (S.toList $ dEdges nd) n
 79 | 
 80 | dItemsOfNode :: NetData -> Node -> [Item]
 81 | dItemsOfNode nd u = map snd $ filter (\(n,_)->u==n) $ M.elems $ dNodeItems nd
 82 | 
 83 | type ModelInit = Map NodeItem (Setting STUpdateUnit)
 84 | 
 85 | randomInit :: NetData -> Map Node (Set Node) -> NodeItem -> RVar ModelInit
 86 | randomInit d friends ni = do
 87 |     let topics = S.toList $ dTopics d
 88 |     t <- randomElement topics
 89 |     s <- randomElement [Shared, Own]
 90 |     let (u,_) = dNodeItems d M.! ni
 91 |     f <- randomElement $ S.toList
 92 |          $ maybe (error "SharedTaste.randomInit: No friends") id $ M.lookup u friends
 93 |     return $ M.singleton ni $
 94 |         case s of Shared -> SharedSetting t (Edge (u,f))
 95 |                   Own    -> OwnSetting t
 96 | 
 97 | randomInitialize' :: NetData -> ModelInit -> RVar ModelInit
 98 | randomInitialize' d init =
 99 |   let unset = M.keysSet (dNodeItems d) `S.difference` M.keysSet init
100 |       friends = M.unionsWith S.union $ map (\(Edge (a,b))-> M.singleton a (S.singleton b)
101 |                                                          <> M.singleton b (S.singleton a)
102 |                                            ) $ S.toList $ dEdges d
103 |   in liftM mconcat $ forM (S.toList unset) $ randomInit d friends
104 | 
105 | 
106 | randomInitialize :: NetData -> RVar ModelInit
107 | randomInitialize = (flip randomInitialize') M.empty
108 | 
109 | updateUnits' :: NetData -> [STUpdateUnit]
110 | updateUnits' d =
111 |     map (\(ni,(n,x)) ->
112 |                STUpdateUnit { uuNI      = ni
113 |                             , uuN       = n
114 |                             , uuX       = x
115 |                             , uuFriends = getFriends (S.toList $ dEdges d) n
116 |                             }
117 |         )
118 |     $ M.assocs $ dNodeItems d
119 | 
120 | updateUnits :: NetData -> [WrappedUpdateUnit MState]
121 | updateUnits = map WrappedUU . updateUnits'
122 | 
123 | model :: NetData -> ModelInit -> MState
124 | model d init =
125 |     let uus = updateUnits' d
126 |         s = MState { stPsis = let dist n = symDirMulti alphaPsi (toList $ getFriends (toList $ dEdges d) n)
127 |                                in foldMap (\n->M.singleton n $ dist n) $ dNodes d
128 |                     , stPhis = let dist = symDirMulti alphaPhi (toList $ dItems d)
129 |                                in foldMap (\t->M.singleton t dist) $ dTopics d
130 |                     , stGammas = let dist = multinom [ (Shared, alphaGammaShared)
131 |                                                      , (Own, alphaGammaOwn) ]
132 |                                  in foldMap (\t->M.singleton t dist) $ dNodes d
133 |                     , stOmegas = let dist = symDirMulti alphaOmega (toList $ dTopics d)
134 |                                  in foldMap (\t->M.singleton t dist) $ dNodes d
135 |                     , stLambdas = let dist = symDirMulti alphaLambda (toList $ dTopics d)
136 |                                   in foldMap (\t->M.singleton t dist) $ dEdges d
137 |                     , stVars = M.empty
138 |                     }
139 |         HyperParams {..} = dHypers d
140 |         initUU uu = do
141 |             let s = maybe (error "Incomplete initialization") id $ M.lookup (uuNI uu) init
142 |             modify $ setUU uu (Just s)
143 |     in execState (mapM initUU uus) s
144 | 
145 | data STSetting = OwnSetting !Topic
146 |                | SharedSetting !Topic !Edge
147 |                deriving (Show, Eq, Generic)
148 | 
149 | instance Binary STSetting
150 | instance NFData STSetting where
151 |     rnf (OwnSetting t)      = rnf t `seq` ()
152 |     rnf (SharedSetting t f) = rnf t `seq` rnf f `seq` ()
153 | 
154 | data MState = MState { stGammas   :: !(Map Node (Multinom Int ItemSource))
155 |                      , stOmegas   :: !(Map Node (Multinom Int Topic))
156 |                      , stPsis     :: !(Map Node (Multinom Int Node))
157 |                      , stLambdas  :: !(Map Edge (Multinom Int Topic))
158 |                      , stPhis     :: !(Map Topic (Multinom Int Item))
159 | 
160 |                      , stVars     :: !(Map NodeItem STSetting)
161 |                      }
162 |             deriving (Show, Generic)
163 | instance Binary MState
164 | 
165 | data STUpdateUnit = STUpdateUnit { uuNI      :: NodeItem
166 |                                  , uuN       :: Node
167 |                                  , uuX       :: Item
168 |                                  , uuFriends :: [Node]
169 |                                  }
170 |                    deriving (Show, Generic)
171 | instance Binary STUpdateUnit
172 | 
173 | setUU :: STUpdateUnit -> Maybe (Setting STUpdateUnit) -> MState -> MState
174 | setUU uu@(STUpdateUnit {uuNI=ni, uuN=n, uuX=x}) setting ms =
175 |     let set = maybe Unset (const Set) setting
176 |         ms' = case maybe (fetchSetting uu ms) id  setting of
177 |             SharedSetting t fship ->
178 |                 let f = maybe (error "Node isn't part of friendship") id
179 |                         $ otherFriend n fship
180 |                 in ms { stPsis = M.adjust (setMultinom set n) f
181 |                                  $ M.adjust (setMultinom set f) n $ stPsis ms
182 |                       , stLambdas = M.adjust (setMultinom set t) fship $ stLambdas ms
183 |                       , stPhis = M.adjust (setMultinom set x) t $ stPhis ms
184 |                       , stGammas = M.adjust (setMultinom set Shared) n $ stGammas ms
185 |                       }
186 |             OwnSetting t ->
187 |                 ms { stOmegas = M.adjust (setMultinom set t) n $ stOmegas ms
188 |                    , stPhis = M.adjust (setMultinom set x) t $ stPhis ms
189 |                    , stGammas = M.adjust (setMultinom set Own) n $ stGammas ms
190 |                    }
191 |     in ms' { stVars = M.alter (const setting) ni $ stVars ms' }
192 | 
193 | instance UpdateUnit STUpdateUnit where
194 |     type ModelState STUpdateUnit = MState
195 |     type Setting STUpdateUnit = STSetting
196 |     fetchSetting uu ms = stVars ms M.! uuNI uu
197 |     evolveSetting ms uu = categorical $ stFullCond (setUU uu Nothing ms) uu
198 |     updateSetting uu _ s' = setUU uu (Just s') . setUU uu Nothing
199 | 
200 | uuProb :: MState -> STUpdateUnit -> Setting STUpdateUnit -> Double
201 | uuProb st (STUpdateUnit {uuN=n, uuX=x}) setting =
202 |     let gamma = stGammas st M.! n
203 |         omega = stOmegas st M.! n
204 |         psi = stPsis st M.! n
205 |     in case setting of
206 |         SharedSetting t fship -> let phi = stPhis st M.! t
207 |                                      lambda = stLambdas st M.! fship
208 |                                      f = maybe (error "Friend isn't friends with node") id
209 |                                          $ otherFriend n fship
210 |                                  in sampleProb gamma Shared
211 |                                   * sampleProb psi f
212 |                                   * sampleProb lambda t
213 |                                   * sampleProb phi x
214 |         OwnSetting t -> let phi = stPhis st M.! t
215 |                         in sampleProb gamma Own
216 |                          * sampleProb omega t
217 |                          * sampleProb phi x
218 | 
219 | stFullCond :: MState -> STUpdateUnit -> [(Double, Setting STUpdateUnit)]
220 | stFullCond ms uu = map (\s->(uuProb ms uu s, s)) $ stDomain ms uu
221 | 
222 | stDomain :: MState -> STUpdateUnit -> [Setting STUpdateUnit]
223 | stDomain ms uu = do
224 |     s <- [Own, Shared]
225 |     t <- M.keys $ stPhis ms
226 |     case s of
227 |         Shared -> do f <- uuFriends uu
228 |                      return $ SharedSetting t (Edge (uuN uu, f))
229 |         Own    -> do return $ OwnSetting t
230 | 
231 | modelLikelihood :: MState -> Probability
232 | modelLikelihood model =
233 |     product (map likelihood $ M.elems $ stGammas model)
234 |   * product (map likelihood $ M.elems $ stPhis model)
235 |   * product (map likelihood $ M.elems $ stLambdas model)
236 |   * product (map likelihood $ M.elems $ stOmegas model)
237 |   * product (map likelihood $ M.elems $ stPsis model)
238 | 
239 | -- | The probability of a collections of items under a given topic mixture.
240 | topicCompatibility :: MState -> [Item] -> Multinom Int Topic -> Probability
241 | topicCompatibility m items lambda =
242 |     product $ do t <- toList $ dmDomain lambda
243 |                  let phi = stPhis m M.! t
244 |                      itemObs = zip items (repeat 1)
245 |                  return $ realToFrac (sampleProb lambda t) * obsProb phi itemObs
246 | 
247 | topicCompatibilities :: (Functor f, Foldable f)
248 |                      => MState -> [Item] -> f (Multinom Int Topic) -> f Probability
249 | topicCompatibilities m items topics =
250 |     let scores = fmap (topicCompatibility m items) topics
251 |     in fmap (/sum scores) scores
252 | 
253 | -- | The influence of adjacent nodes on a node.
254 | influence :: NetData -> MState -> Node -> Map Node Probability
255 | influence d m u =
256 |     let lambdas = foldMap (\f->M.singleton f $ stLambdas m M.! Edge (u,f))
257 |                   $ dAdjNodes d u
258 |     in topicCompatibilities m (dItemsOfNode d u) lambdas
259 | 


--------------------------------------------------------------------------------
/network-topic-models/BayesStack/Models/Topic/Test.hs:
--------------------------------------------------------------------------------
 1 | import qualified Data.Map as M
 2 | 
 3 | main = do
 4 |     let m :: M.Map Int Int
 5 |         m = M.fromList [ (1,1), (2,2) ]
 6 |     print $ M.findWithDefault 6 1 m
 7 |     print $ M.findWithDefault 6 2 m
 8 | 
 9 |     print $ M.lookup 1 m
10 |     print $ M.lookup 2 m
11 | 


--------------------------------------------------------------------------------
/network-topic-models/BayesStack/Models/Topic/Types.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TypeFamilies, GeneralizedNewtypeDeriving, DeriveGeneric #-}
 2 | 
 3 | module BayesStack.Models.Topic.Types where
 4 | 
 5 | import Control.DeepSeq
 6 | import GHC.Generics
 7 | import Data.Binary
 8 | import Data.Function
 9 | import Data.Maybe
10 | 
11 | import Data.Map (Map)
12 | import qualified Data.Map as M
13 | 
14 | newtype Node = Node Int deriving (Show, Eq, Ord, Enum, Generic, NFData)
15 | newtype Item = Item Int deriving (Show, Eq, Ord, Enum, Generic, NFData)
16 | newtype Topic = Topic Int deriving (Show, Eq, Ord, Enum, Generic, NFData)
17 | newtype NodeItem = NodeItem Int deriving (Show, Eq, Ord, Enum, Generic, NFData)
18 | newtype Edge = Edge (Node, Node) deriving (Show, Generic, NFData)
19 | 
20 | instance Binary Node
21 | instance Binary Item
22 | instance Binary Topic
23 | instance Binary NodeItem
24 | instance Binary Edge
25 | 
26 | instance Eq Edge where
27 |   (Edge (a,b)) == (Edge (c,d)) = (a == c && b == d) || (a == d && b == c)
28 | instance Enum Edge where
29 |   fromEnum (Edge (a,b)) = let a' = min a b
30 |                               b' = max a b
31 |                           in 2^32 * fromEnum a' + fromEnum b'
32 |   toEnum n = let (na, nb) = n `quotRem` (2^32)
33 |              in Edge (toEnum na, toEnum nb)
34 | instance Ord Edge where
35 |   compare = compare `on` fromEnum
36 | 
37 | otherFriend :: Node -> Edge -> Maybe Node
38 | otherFriend u (Edge (a,b))
39 |   | u == a     = Just b
40 |   | u == b     = Just a
41 |   | otherwise  = Nothing
42 | 
43 | isFriend :: Node -> Edge -> Bool
44 | isFriend u fs = isJust $ otherFriend u fs
45 | 
46 | getFriends :: [Edge] -> Node -> [Node]
47 | getFriends fs u = mapMaybe (otherFriend u) fs
48 | 
49 | setupNodeItems :: [(Node,Item)] -> Map NodeItem (Node, Item)
50 | setupNodeItems nodeItems = M.fromList $ zipWith (\idx (n,i)->(NodeItem idx, (n,i))) [0..] nodeItems
51 | 


--------------------------------------------------------------------------------
/network-topic-models/BenchLDA.hs:
--------------------------------------------------------------------------------
 1 | module BenchLDA where
 2 | 
 3 | import           Control.Monad.Trans.State
 4 | import           Control.Monad (replicateM, forM)
 5 | import           Control.Applicative ((<$>))
 6 | import           Text.Printf
 7 | import           Control.Concurrent (setNumCapabilities)
 8 | 
 9 | import           Criterion
10 | import           Data.Random
11 | 
12 | import qualified Data.Set as S
13 | import           BayesStack.Core.Gibbs
14 | import           BayesStack.Models.Topic.LDA
15 | 
16 | data NetParams = NetParams { nNodes        :: Int
17 |                            , nItems        :: Int
18 |                            , nTopics       :: Int
19 |                            , nItemsPerNode :: Int
20 |                            }
21 |                deriving (Show, Eq, Ord)
22 | 
23 | netParams = NetParams { nNodes = 50000
24 |                       , nItems = nItemsPerNode netParams * nNodes netParams `div` 10
25 |                       , nTopics = 100
26 |                       , nItemsPerNode = 200
27 |                       }
28 | 
29 | randomNetwork :: NetParams -> RVar NetData
30 | randomNetwork net = do
31 |     let nodes = [Node i | i <- [1..nNodes net]]
32 |         items = [Item i | i <- [1..nItems net]]
33 |         nodeItem = do node <- randomElement nodes
34 |                       item <- randomElement items
35 |                       return (node, item)
36 |     edges <- replicateM (nItemsPerNode net) nodeItem
37 |     return $! NetData { dAlphaTheta = 0.1
38 |                       , dAlphaPhi   = 0.1
39 |                       , dNodes      = S.fromList nodes
40 |                       , dItems      = S.fromList items
41 |                       , dTopics     = S.fromList [Topic i | i <- [1..nTopics net]]
42 |                       , dNodeItems  = setupNodeItems edges
43 |                       }
44 | 
45 | benchmarksForNetwork :: NetParams -> NetData -> ModelInit -> [Benchmark]
46 | benchmarksForNetwork np net init = do
47 |     let sweeps = 100
48 |     updateBlock <- [10, 100, 1000]
49 |     threads <- [1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
50 |     let name = printf "%d topics, %d threads, %d block, %d items per node" (nTopics np) threads updateBlock (nItemsPerNode np)
51 |     return $ bench name $ do
52 |         setNumCapabilities threads
53 |         gibbsUpdate threads updateBlock (model net init)
54 |             $ concat $ replicate sweeps (updateUnits net)
55 | 
56 | benchmarksForNetParams :: NetParams -> RVar [Benchmark]
57 | benchmarksForNetParams np = do
58 |     net <- randomNetwork np
59 |     init <- randomInitialize net
60 |     return $ benchmarksForNetwork np net init
61 | 
62 | ldaBenchmarkParams :: RVar [[Benchmark]]
63 | ldaBenchmarkParams =
64 |     mapM benchmarksForNetParams
65 |     $ do topics <- [100, 500, 1000]
66 |          return netParams {nTopics=topics}
67 | 
68 | ldaBenchmarks :: RVar Benchmark
69 | ldaBenchmarks = bgroup "LDA" . concat <$> ldaBenchmarkParams
70 | 


--------------------------------------------------------------------------------
/network-topic-models/BenchST.hs:
--------------------------------------------------------------------------------
 1 | module BenchST where
 2 | 
 3 | import           Control.Monad.Trans.State
 4 | import           Control.Monad (replicateM, forM, guard)
 5 | import           Control.Applicative ((<$>))
 6 | import           Text.Printf
 7 | import           Control.Concurrent (setNumCapabilities)
 8 | 
 9 | import           Criterion
10 | import           Data.Random
11 | 
12 | import qualified Data.Set as S
13 | import           Data.List ((\\))
14 | import           BayesStack.Core.Gibbs
15 | import           BayesStack.Models.Topic.SharedTaste
16 | 
17 | data NetParams = NetParams { nNodes        :: Int
18 |                            , nEdgesPerNode :: Int
19 |                            , nItems        :: Int
20 |                            , nTopics       :: Int
21 |                            , nItemsPerNode :: Int
22 |                            }
23 | 
24 | netParams = NetParams { nNodes = 5000
25 |                       , nEdgesPerNode = 10
26 |                       , nItems = nItemsPerNode netParams * nNodes netParams `div` 10
27 |                       , nTopics = 100
28 |                       , nItemsPerNode = 20
29 |                       }
30 | 
31 | randomNetwork :: NetParams -> RVar NetData
32 | randomNetwork net = do
33 |     let nodes = [Node i | i <- [1..nNodes net]]
34 |         items = [Item i | i <- [1..nItems net]]
35 |         edge a = do b <- randomElement nodes --(nodes \\ [a])
36 |                     return $ Edge (a,b)
37 |         nodeItem = do node <- randomElement nodes
38 |                       item <- randomElement items
39 |                       return (node, item)
40 |     edges <- concat <$> forM nodes (replicateM (nEdgesPerNode net) . edge)
41 |     nodeItems <- replicateM (nItemsPerNode net) nodeItem
42 |     return $ NetData { dAlphaLambda= 0.1
43 |                      , dAlphaPhi   = 0.1
44 |                      , dAlphaPsi   = 0.01
45 |                      , dAlphaOmega = 0.1
46 |                      , dAlphaGammaShared = 0.9
47 |                      , dAlphaGammaOwn = 0.1
48 |                      , dEdges      = S.fromList edges
49 |                      , dItems      = S.fromList items
50 |                      , dTopics     = S.fromList [Topic i | i <- [1..nTopics net]]
51 |                      , dNodeItems  = setupNodeItems nodeItems
52 |                      }
53 | 
54 | data STBenchmark = STBenchmark { bNetParams    :: NetParams
55 |                                , bThreads      :: Int
56 |                                , bUpdateBlock  :: Int
57 |                                , bSweeps       :: Int
58 |                                }
59 | 
60 | drawStBenchmark :: STBenchmark -> RVar Benchmark
61 | drawStBenchmark b = do
62 |     net <- randomNetwork $ bNetParams b
63 |     init <- randomInitialize net
64 |     let name = printf "%d topics, %d threads, %d block, %d items per node" (nTopics $ bNetParams b) (bThreads b) (bUpdateBlock b) (nItemsPerNode $ bNetParams b)
65 |     return $ bench name $ do
66 |         setNumCapabilities (bThreads b)
67 |         gibbsUpdate (bThreads b) (bUpdateBlock b) (model net init)
68 |             $ concat $ replicate (bSweeps b) (updateUnits net)
69 | 
70 | stBenchmarkParams :: [STBenchmark]
71 | stBenchmarkParams = do
72 |     updateBlock <- [10, 100, 1000]
73 |     topics <- [20, 100, 500]
74 |     threads <- [1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
75 |     return STBenchmark { bNetParams = netParams {nTopics=topics}
76 |                        , bThreads = threads
77 |                        , bUpdateBlock = updateBlock
78 |                        , bSweeps = 2
79 |                        }
80 | 
81 | stBenchmarks :: RVar Benchmark
82 | stBenchmarks = bgroup "ST" `fmap` mapM drawStBenchmark stBenchmarkParams
83 | 


--------------------------------------------------------------------------------
/network-topic-models/Benchmark.hs:
--------------------------------------------------------------------------------
 1 | import           Criterion
 2 | import           Criterion.Config
 3 | import           Criterion.Main
 4 | import           Data.Random
 5 | import           System.Random.MWC
 6 | import           BenchLDA
 7 | import           BenchST
 8 | 
 9 | benchmarks :: RVar [Benchmark]
10 | benchmarks = sequence [ldaBenchmarks] --, stBenchmarks]
11 | 
12 | withSystemRandomIO :: (GenIO -> IO a) -> IO a
13 | withSystemRandomIO = withSystemRandom
14 | 
15 | main = do
16 |     bs <- withSystemRandomIO $ runRVar benchmarks
17 |     defaultMainWith defaultConfig (return ()) bs
18 | 


--------------------------------------------------------------------------------
/network-topic-models/DumpCI.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | 
  3 | import           Data.Monoid
  4 | import           Data.Foldable
  5 | import           Data.List
  6 | import           Data.Function (on)
  7 | import           Options.Applicative
  8 | 
  9 | import qualified Data.Map as M
 10 | import qualified Data.Set as S
 11 | import qualified Data.ByteString as BS
 12 | 
 13 | import qualified Data.Text.Lazy.IO as TL
 14 | import qualified Data.Text.Lazy.Builder as TB
 15 | import           Data.Text.Lazy.Builder.Int
 16 | import           Data.Text.Lazy.Builder.RealFloat
 17 | import           Data.Binary
 18 | 
 19 | import           System.FilePath ((</>))
 20 | import           Text.Printf
 21 | 
 22 | import           BayesStack.Models.Topic.CitationInfluence
 23 | import           FormatMultinom
 24 | import           Numeric.Log
 25 | import           ReadData
 26 | import           SerializeText
 27 | 
 28 | data Opts = Opts { nElems   :: Maybe Int
 29 |                  , dumper   :: Dumper
 30 |                  , sweepDir :: FilePath
 31 |                  , sweepNum :: Maybe Int
 32 |                  }
 33 | 
 34 | type Dumper = Opts -> NetData -> MState
 35 |               -> (Item -> TB.Builder) -> (Node -> TB.Builder)
 36 |               -> TB.Builder
 37 | 
 38 | showB :: Show a => a -> TB.Builder
 39 | showB = TB.fromString . show
 40 | 
 41 | showTopic :: Topic -> TB.Builder
 42 | showTopic (Topic n) = "Topic "<>decimal n
 43 | 
 44 | formatProb = formatRealFloat Exponent (Just 3) . realToFrac
 45 | 
 46 | readDumper :: String -> Maybe Dumper
 47 | readDumper "phis"   = Just $ \opts nd m showItem showNode ->
 48 |     formatMultinoms showTopic showItem (nElems opts) (stPhis m)
 49 | 
 50 | readDumper "psis"   = Just $ \opts nd m showItem showNode ->
 51 |     formatMultinoms (\(Citing n)->showNode n) showB (nElems opts) (stPsis m)
 52 | 
 53 | readDumper "lambdas"= Just $ \opts nd m showItem showNode ->
 54 |     formatMultinoms (\(Cited n)->showNode n) showB (nElems opts) (stLambdas m)
 55 | 
 56 | readDumper "omegas" = Just $ \opts nd m showItem showNode ->
 57 |     formatMultinoms (\(Citing n)->showNode n) showB (nElems opts) (stOmegas m)
 58 | 
 59 | readDumper "gammas" = Just $ \opts nd m showItem showNode ->
 60 |     formatMultinoms (\(Citing n)->showNode n) showB (nElems opts) (stGammas m)
 61 | 
 62 | readDumper "influences" = Just $ \opts nd m showItem showNode ->
 63 |     let formatInfluences u =
 64 |             foldMap (\(Cited n,p)->"\t" <> showNode n <> "\t" <> formatProb p <> "\n")
 65 |             $ sortBy (flip (compare `on` snd))
 66 |             $ M.assocs $ influence nd m u
 67 |     in foldMap (\u@(Citing u')->"\n" <> showNode u' <> "\n" <> formatInfluences u)
 68 |        $ M.keys $ stGammas m
 69 | 
 70 | readDumper "edge-mixtures" = Just $ \opts nd m showItem showNode ->
 71 |     let showArc (Arc (Citing d) (Cited c)) = showNode d <> " -> " <> showNode c
 72 |         formatMixture a =
 73 |             let ps = sortBy (flip compare `on` snd)
 74 |                    $ map (\t->(t, arcTopicMixture nd m a t))
 75 |                    $ S.toList $ dTopics nd
 76 |                 norm = Numeric.Log.sum $ map snd ps
 77 |             in foldMap (\(t,p)->"\t" <> showTopic t <> "\t" <> formatProb p <> "\n")
 78 |                $ maybe id take (nElems opts)
 79 |                $ map (\(t,p)->(t, p / norm)) ps
 80 |     in foldMap (\a->"\n" <> showArc a <> "\n" <> formatMixture a)
 81 |        $ S.toList $ dArcs nd
 82 | 
 83 | readDumper _        = Nothing
 84 | 
 85 | opts = Opts
 86 |     <$> nullOption   ( long "top"
 87 |                     <> short 'n'
 88 |                     <> value Nothing
 89 |                     <> reader (pure . auto)
 90 |                     <> metavar "N"
 91 |                     <> help "Number of elements to output from each distribution"
 92 |                      )
 93 |     <*> argument readDumper
 94 |                      ( metavar "STR"
 95 |                     <> help "One of: phis, psis, lambdas, omegas, gammas, influences, edge-mixtures"
 96 |                      )
 97 |     <*> strOption    ( long "sweeps"
 98 |                     <> short 's'
 99 |                     <> value "sweeps"
100 |                     <> metavar "DIR"
101 |                     <> help "The directory of sweeps to dump"
102 |                      )
103 |     <*> option       ( long "sweep-n"
104 |                     <> short 'N'
105 |                     <> reader (pure . auto)
106 |                     <> value Nothing
107 |                     <> metavar "N"
108 |                     <> help "The sweep number to dump"
109 |                      )
110 | 
111 | readSweep :: FilePath -> IO MState
112 | readSweep = decodeFile
113 | 
114 | readNetData :: FilePath -> IO NetData
115 | readNetData = decodeFile
116 | 
117 | main = do
118 |     args <- execParser $ info (helper <*> opts)
119 |          ( fullDesc
120 |         <> progDesc "Dump distributions from an citation influence model sweep"
121 |         <> header "dump-ci - Dump distributions from an citation influence model sweep"
122 |          )
123 | 
124 |     nd <- readNetData $ sweepDir args </> "data"
125 |     itemMap <- readItemMap $ sweepDir args
126 |     nodeMap <- readNodeMap $ sweepDir args
127 |     m <- case sweepNum args of
128 |              Nothing -> readSweep =<< getLastSweep (sweepDir args)
129 |              Just n  -> readSweep $ sweepDir args </> printf "%05d.state" n
130 | 
131 |     let showItem = showB . (itemMap M.!)
132 |         showNode = showB . (nodeMap M.!)
133 |     TL.putStr $ TB.toLazyText $ dumper args args nd m showItem showNode
134 | 


--------------------------------------------------------------------------------
/network-topic-models/DumpCINT.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | 
  3 | import           Control.Lens hiding (argument)
  4 | import           Data.Foldable
  5 | import           Data.Function (on)
  6 | import           Data.List
  7 | import           Data.Monoid
  8 | import           Options.Applicative
  9 | 
 10 | import qualified Data.Map as M
 11 | import qualified Data.Set as S
 12 | import qualified Data.ByteString as BS
 13 | 
 14 | import qualified Data.Text.Lazy.IO as TL
 15 | import qualified Data.Text.Lazy.Builder as TB
 16 | import           Data.Text.Lazy.Builder.Int
 17 | import           Data.Text.Lazy.Builder.RealFloat
 18 | import           Data.Binary
 19 | 
 20 | import           System.FilePath ((</>))
 21 | import           Text.Printf
 22 | 
 23 | import           BayesStack.Models.Topic.CitationInfluenceNoTopics
 24 | import           FormatMultinom
 25 | import           Numeric.Log
 26 | import           ReadData
 27 | import           SerializeText
 28 | 
 29 | data Opts = Opts { nElems   :: Maybe Int
 30 |                  , dumper   :: Dumper
 31 |                  , sweepDir :: FilePath
 32 |                  , sweepNum :: Maybe Int
 33 |                  }
 34 | 
 35 | type Dumper = Opts -> NetData -> MState
 36 |               -> (Item -> TB.Builder) -> (Node -> TB.Builder)
 37 |               -> TB.Builder
 38 | 
 39 | showB :: Show a => a -> TB.Builder
 40 | showB = TB.fromString . show
 41 | 
 42 | showTopic :: Topic -> TB.Builder
 43 | showTopic (Topic n) = "Topic "<>decimal n
 44 | 
 45 | formatProb = formatRealFloat Exponent (Just 3) . realToFrac
 46 | 
 47 | readDumper :: String -> Maybe Dumper
 48 | readDumper "psis"   = Just $ \opts nd m showItem showNode ->
 49 |     formatMultinoms (\(Citing n)->showNode n) (\(Cited n)->showNode n) (nElems opts) (m^.stPsis)
 50 | 
 51 | readDumper "lambdas"= Just $ \opts nd m showItem showNode ->
 52 |     formatMultinoms (\(Cited n)->showNode n) showItem (nElems opts) (m^.stLambdas)
 53 | 
 54 | readDumper "omegas" = Just $ \opts nd m showItem showNode ->
 55 |     formatMultinoms (\(Citing n)->showNode n) showB (nElems opts) (m^.stOmegas)
 56 | 
 57 | readDumper "gammas" = Just $ \opts nd m showItem showNode ->
 58 |     formatMultinoms (\(Citing n)->showNode n) showB (nElems opts) (m^.stGammas)
 59 | 
 60 | readDumper _        = Nothing
 61 | 
 62 | opts = Opts
 63 |     <$> nullOption   ( long "top"
 64 |                     <> short 'n'
 65 |                     <> value Nothing
 66 |                     <> reader (pure . auto)
 67 |                     <> metavar "N"
 68 |                     <> help "Number of elements to output from each distribution"
 69 |                      )
 70 |     <*> argument readDumper
 71 |                      ( metavar "STR"
 72 |                     <> help "One of: phis, psis, lambdas, omegas, gammas, influences, edge-mixtures"
 73 |                      )
 74 |     <*> strOption    ( long "sweeps"
 75 |                     <> short 's'
 76 |                     <> value "sweeps"
 77 |                     <> metavar "DIR"
 78 |                     <> help "The directory of sweeps to dump"
 79 |                      )
 80 |     <*> option       ( long "sweep-n"
 81 |                     <> short 'N'
 82 |                     <> reader (pure . auto)
 83 |                     <> value Nothing
 84 |                     <> metavar "N"
 85 |                     <> help "The sweep number to dump"
 86 |                      )
 87 | 
 88 | readSweep :: FilePath -> IO MState
 89 | readSweep = decodeFile
 90 | 
 91 | readNetData :: FilePath -> IO NetData
 92 | readNetData = decodeFile
 93 | 
 94 | main = do
 95 |     args <- execParser $ info (helper <*> opts)
 96 |          ( fullDesc
 97 |         <> progDesc "Dump distributions from an citation influence model sweep"
 98 |         <> header "dump-ci - Dump distributions from an citation influence model sweep"
 99 |          )
100 | 
101 |     nd <- readNetData $ sweepDir args </> "data"
102 |     itemMap <- readItemMap $ sweepDir args
103 |     nodeMap <- readNodeMap $ sweepDir args
104 |     m <- case sweepNum args of
105 |              Nothing -> readSweep =<< getLastSweep (sweepDir args)
106 |              Just n  -> readSweep $ sweepDir args </> printf "%05d.state" n
107 | 
108 |     let showItem = showB . (itemMap M.!)
109 |         showNode = showB . (nodeMap M.!)
110 |     TL.putStr $ TB.toLazyText $ dumper args args nd m showItem showNode
111 | 


--------------------------------------------------------------------------------
/network-topic-models/DumpLDA.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | import           Data.Monoid
 4 | import           Options.Applicative
 5 | 
 6 | import qualified Data.Map as M
 7 | import qualified Data.ByteString as BS
 8 | 
 9 | import qualified Data.Text.Lazy.IO as TL
10 | import           Data.Text.Lazy.Builder.Int
11 | import qualified Data.Text.Lazy.Builder as TB
12 | import           Data.Binary
13 | 
14 | import           System.FilePath ((</>))
15 | import           Text.Printf
16 | 
17 | import           BayesStack.Models.Topic.LDA
18 | import           SerializeText
19 | import           ReadData
20 | import           FormatMultinom
21 | 
22 | data Opts = Opts { nElems   :: Maybe Int
23 |                  , dumper   :: Dumper
24 |                  , sweepDir :: FilePath
25 |                  , sweepNum :: Maybe Int
26 |                  }
27 | 
28 | type Dumper = Opts -> NetData -> MState
29 |               -> (Item -> TB.Builder) -> (Node -> TB.Builder)
30 |               -> TB.Builder
31 | 
32 | showB :: Show a => a -> TB.Builder
33 | showB = TB.fromString . show
34 | 
35 | readDumper :: String -> Maybe Dumper
36 | readDumper "thetas" = Just $ \opts nd m showItem showNode ->
37 |     formatMultinoms showNode showB (nElems opts) (stThetas m)
38 | 
39 | readDumper "phis"   = Just $ \opts nd m showItem showNode ->
40 |     formatMultinoms (\(Topic n)->"Topic "<>decimal n) showItem (nElems opts) (stPhis m)
41 | 
42 | readDumper _        = Nothing
43 | 
44 | opts = Opts
45 |     <$> nullOption   ( long "top"
46 |                     <> short 'n'
47 |                     <> value Nothing
48 |                     <> reader (pure . auto)
49 |                     <> metavar "N"
50 |                     <> help "Number of elements to output from each distribution"
51 |                      )
52 |     <*> argument readDumper
53 |                      ( metavar "STR"
54 |                     <> help "One of: thetas, lambdas"
55 |                      )
56 |     <*> strOption    ( long "sweeps"
57 |                     <> short 's'
58 |                     <> value "sweeps"
59 |                     <> metavar "DIR"
60 |                     <> help "The directory of sweeps to dump"
61 |                      )
62 |     <*> option       ( long "number"
63 |                     <> short 'N'
64 |                     <> reader (pure . auto)
65 |                     <> value Nothing
66 |                     <> metavar "N"
67 |                     <> help "The sweep number to dump"
68 |                      )
69 | 
70 | readSweep :: FilePath -> IO MState
71 | readSweep = decodeFile
72 | 
73 | readNetData :: FilePath -> IO NetData
74 | readNetData = decodeFile
75 | 
76 | main = do
77 |     args <- execParser $ info (helper <*> opts)
78 |          ( fullDesc
79 |         <> progDesc "Dump distributions from an LDA sweep"
80 |         <> header "dump-lda - Dump distributions from an LDA sweep"
81 |          )
82 | 
83 |     nd <- readNetData $ sweepDir args </> "data"
84 |     itemMap <- readItemMap $ sweepDir args
85 |     nodeMap <- readNodeMap $ sweepDir args
86 |     m <- case sweepNum args of
87 |              Nothing -> readSweep =<< getLastSweep (sweepDir args)
88 |              Just n  -> readSweep $ sweepDir args </> printf "%05d.state" n
89 | 
90 |     let showItem = showB . (itemMap M.!)
91 |         showNode = showB . (nodeMap M.!)
92 |     TL.putStr $ TB.toLazyText $ dumper args args nd m showItem showNode
93 | 


--------------------------------------------------------------------------------
/network-topic-models/DumpLDARelevance.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | import           Data.Monoid
 4 | import           Options.Applicative
 5 | 
 6 | import qualified Data.Map as M
 7 | import qualified Data.ByteString as BS
 8 | 
 9 | import qualified Data.Text.Lazy.IO as TL
10 | import           Data.Text.Lazy.Builder.Int
11 | import qualified Data.Text.Lazy.Builder as TB
12 | import           Data.Binary
13 | 
14 | import           System.FilePath ((</>))
15 | import           Text.Printf
16 | 
17 | import           BayesStack.Models.Topic.LDARelevance
18 | import           SerializeText
19 | import           ReadData
20 | import           FormatMultinom
21 | 
22 | data Opts = Opts { nElems   :: Maybe Int
23 |                  , dumper   :: Dumper
24 |                  , sweepDir :: FilePath
25 |                  , sweepNum :: Maybe Int
26 |                  }
27 | 
28 | type Dumper = Opts -> NetData -> MState
29 |               -> (Item -> TB.Builder) -> (Node -> TB.Builder)
30 |               -> TB.Builder
31 | 
32 | showB :: Show a => a -> TB.Builder
33 | showB = TB.fromString . show
34 | 
35 | readDumper :: String -> Maybe Dumper
36 | readDumper "thetas" = Just $ \opts nd m showItem showNode ->
37 |     formatMultinoms showNode showB (nElems opts) (stThetas m)
38 | 
39 | readDumper "phis"   = Just $ \opts nd m showItem showNode ->
40 |     formatMultinoms (\(Topic n)->"Topic "<>decimal n) showItem (nElems opts) (stPhis m)
41 | 
42 | readDumper _        = Nothing
43 | 
44 | opts = Opts
45 |     <$> nullOption   ( long "top"
46 |                     <> short 'n'
47 |                     <> value Nothing
48 |                     <> reader (pure . auto)
49 |                     <> metavar "N"
50 |                     <> help "Number of elements to output from each distribution"
51 |                      )
52 |     <*> argument readDumper
53 |                      ( metavar "STR"
54 |                     <> help "One of: thetas, lambdas"
55 |                      )
56 |     <*> strOption    ( long "sweeps"
57 |                     <> short 's'
58 |                     <> value "sweeps"
59 |                     <> metavar "DIR"
60 |                     <> help "The directory of sweeps to dump"
61 |                      )
62 |     <*> option       ( long "number"
63 |                     <> short 'N'
64 |                     <> reader (pure . auto)
65 |                     <> value Nothing
66 |                     <> metavar "N"
67 |                     <> help "The sweep number to dump"
68 |                      )
69 | 
70 | readSweep :: FilePath -> IO MState
71 | readSweep = decodeFile
72 | 
73 | readNetData :: FilePath -> IO NetData
74 | readNetData = decodeFile
75 | 
76 | main = do
77 |     args <- execParser $ info (helper <*> opts)
78 |          ( fullDesc
79 |         <> progDesc "Dump distributions from an LDA sweep"
80 |         <> header "dump-lda - Dump distributions from an LDA sweep"
81 |          )
82 | 
83 |     nd <- readNetData $ sweepDir args </> "data"
84 |     itemMap <- readItemMap $ sweepDir args
85 |     nodeMap <- readNodeMap $ sweepDir args
86 |     m <- case sweepNum args of
87 |              Nothing -> readSweep =<< getLastSweep (sweepDir args)
88 |              Just n  -> readSweep $ sweepDir args </> printf "%05d.state" n
89 | 
90 |     let showItem = showB . (itemMap M.!)
91 |         showNode = showB . (nodeMap M.!)
92 |     TL.putStr $ TB.toLazyText $ dumper args args nd m showItem showNode
93 | 


--------------------------------------------------------------------------------
/network-topic-models/DumpST.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | 
  3 | import           Data.Monoid
  4 | import           Data.Foldable
  5 | import           Data.List
  6 | import           Data.Function (on)
  7 | import           Options.Applicative
  8 | 
  9 | import qualified Data.Map as M
 10 | import qualified Data.ByteString as BS
 11 | 
 12 | import qualified Data.Text.Lazy.IO as TL
 13 | import qualified Data.Text.Lazy.Builder as TB
 14 | import           Data.Text.Lazy.Builder.Int
 15 | import           Data.Text.Lazy.Builder.RealFloat
 16 | import           Data.Binary
 17 | 
 18 | import           System.FilePath ((</>))
 19 | import           Text.Printf
 20 | 
 21 | import           BayesStack.Models.Topic.SharedTaste
 22 | import           SerializeText
 23 | import           ReadData
 24 | import           FormatMultinom
 25 | 
 26 | data Opts = Opts { nElems   :: Maybe Int
 27 |                  , dumper   :: Dumper
 28 |                  , sweepDir :: FilePath
 29 |                  , sweepNum :: Maybe Int
 30 |                  }
 31 | 
 32 | type Dumper = Opts -> NetData -> MState
 33 |               -> (Item -> TB.Builder) -> (Node -> TB.Builder)
 34 |               -> TB.Builder
 35 | 
 36 | showB :: Show a => a -> TB.Builder
 37 | showB = TB.fromString . show
 38 | 
 39 | readDumper :: String -> Maybe Dumper
 40 | readDumper "phis"   = Just $ \opts nd m showItem showNode ->
 41 |     formatMultinoms (\(Topic n)->"Topic "<>decimal n) showItem (nElems opts) (stPhis m)
 42 | 
 43 | readDumper "psis"   = Just $ \opts nd m showItem showNode ->
 44 |     formatMultinoms showNode showB (nElems opts) (stPsis m)
 45 | 
 46 | readDumper "lambdas"= Just $ \opts nd m showItem showNode ->
 47 |     formatMultinoms showB showB (nElems opts) (stLambdas m)
 48 | 
 49 | readDumper "omegas" = Just $ \opts nd m showItem showNode ->
 50 |     formatMultinoms showB showB (nElems opts) (stOmegas m)
 51 | 
 52 | readDumper "gammas" = Just $ \opts nd m showItem showNode ->
 53 |     formatMultinoms showB showB (nElems opts) (stGammas m)
 54 | 
 55 | readDumper "influences" = Just $ \opts nd m showItem showNode ->
 56 |     let formatProb = formatRealFloat Exponent (Just 3) . realToFrac
 57 |         formatInfluences u =
 58 |             foldMap (\(n,p)->"\t" <> showNode n <> "\t" <> formatProb p <> "\n")
 59 |             $ sortBy (flip (compare `on` snd))
 60 |             $ M.assocs $ influence nd m u
 61 |     in foldMap (\u->"\n" <> showB u <> "\n" <> formatInfluences u)
 62 |        $ M.keys $ stGammas m
 63 | 
 64 | opts = Opts
 65 |     <$> nullOption   ( long "top"
 66 |                     <> short 'n'
 67 |                     <> value Nothing
 68 |                     <> reader (pure . auto)
 69 |                     <> metavar "N"
 70 |                     <> help "Number of elements to output from each distribution"
 71 |                      )
 72 |     <*> argument readDumper
 73 |                      ( metavar "STR"
 74 |                     <> help "One of: phis, psis, lambdas, omegas, gammas, influences"
 75 |                      )
 76 |     <*> strOption    ( long "sweeps"
 77 |                     <> short 's'
 78 |                     <> value "sweeps"
 79 |                     <> metavar "DIR"
 80 |                     <> help "The directory of sweeps to dump"
 81 |                      )
 82 |     <*> option       ( long "number"
 83 |                     <> short 'N'
 84 |                     <> reader (pure . auto)
 85 |                     <> value Nothing
 86 |                     <> metavar "N"
 87 |                     <> help "The sweep number to dump"
 88 |                      )
 89 | 
 90 | readSweep :: FilePath -> IO MState
 91 | readSweep = decodeFile
 92 | 
 93 | readNetData :: FilePath -> IO NetData
 94 | readNetData = decodeFile
 95 | 
 96 | main = do
 97 |     args <- execParser $ info (helper <*> opts)
 98 |          ( fullDesc
 99 |         <> progDesc "Dump distributions from an shared taste model sweep"
100 |         <> header "dump-lda - Dump distributions from an shared taste model sweep"
101 |          )
102 | 
103 |     nd <- readNetData $ sweepDir args </> "data"
104 |     itemMap <- readItemMap $ sweepDir args
105 |     nodeMap <- readNodeMap $ sweepDir args
106 |     m <- case sweepNum args of
107 |              Nothing -> readSweep =<< getLastSweep (sweepDir args)
108 |              Just n  -> readSweep $ sweepDir args </> printf "%05d.state" n
109 | 
110 |     let showItem = showB . (itemMap M.!)
111 |         showNode = showB . (nodeMap M.!)
112 |     TL.putStr $ TB.toLazyText $ dumper args args nd m showItem showNode
113 | 


--------------------------------------------------------------------------------
/network-topic-models/FormatMultinom.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | module FormatMultinom ( formatMultinom
 4 |                       , formatMultinoms
 5 |                       ) where
 6 | 
 7 | import           Data.Foldable
 8 | import           Data.Monoid
 9 | 
10 | import qualified Data.Text.Lazy.IO as TL
11 | import qualified Data.Text.Lazy.Builder as TB
12 | import           Data.Text.Lazy.Builder.RealFloat
13 | 
14 | import qualified Data.Map as M
15 | 
16 | import           BayesStack.DirMulti
17 | 
18 | formatMultinom :: (Real w, Ord a, Enum a)
19 |                => (a -> TB.Builder) -> Maybe Int -> Multinom w a -> TB.Builder
20 | formatMultinom show n = foldMap formatElem . takeTop . toList . decProbabilities
21 |     where formatElem (p,x) =
22 |                "\t" <> show x <> "\t" <> formatRealFloat Exponent (Just 3) p <> "\n"
23 |           takeTop = maybe id take n
24 | 
25 | formatMultinoms :: (Real w, Ord k, Ord a, Enum a)
26 |                 => (k -> TB.Builder) -> (a -> TB.Builder) -> Maybe Int
27 |                 -> M.Map k (Multinom w a) -> TB.Builder
28 | formatMultinoms showKey showElem n = foldMap go . M.assocs
29 |     where go (k,v) = showKey k <> "\n"
30 |                    <> formatMultinom showElem n v <> "\n"
31 | 


--------------------------------------------------------------------------------
/network-topic-models/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012, Ben Gamari
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of Ben Gamari nor the names of other
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/network-topic-models/README.mkd:
--------------------------------------------------------------------------------
 1 | # Network topic models for `bayes-stack`
 2 | 
 3 | `network-topic-models` implements a few network topic models on top of
 4 | `bayes-stack`. These include,
 5 | 
 6 |   * LDA [1]
 7 |   * Shared taste model [2]
 8 |   * Citation influence model [3]
 9 |   
10 | We adopt the following nomenclature,
11 | 
12 |   * Node: a document or user that contains items
13 |   * Item: a word, tag, or other content of a node
14 |   * Edge: a set of two nodes (undirected edge)
15 |   * Arc:  a pair of two nodes (directed edge)
16 | 
17 | See `doc/models/models.tex` for graphical representation of the
18 | implemented models.
19 | 
20 | ## References
21 | 
22 |  1. D. Blei, A. Ng, and M. Jordan. "Latent Dirichlet allocation." _Journal of Machine Learning Research_, 3:993–1022, 2003.
23 |  2. L. Dietz, B. Gamari, J. Guiver, E. Snelson, and R. Herbrich. "De-layering social networks by shared tastes of friendships." _Proceedings of the Sixths International AAAI Conference on Weblogs and Social Media_, 2012.
24 |  3. L. Dietz, S. Bickel, and T. Scheffer. "Unsupervised prediction of citation influences." _Proceedings of the 24th international conference on machine learning_, 2007.
25 | 
26 | 


--------------------------------------------------------------------------------
/network-topic-models/ReadData.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE PatternGuards #-}
 2 | 
 3 | module ReadData ( Term, NodeName
 4 |                 , readEdges
 5 |                 , readNodeItems
 6 |                 , getLastSweep
 7 |                 , readItemMap, readNodeMap
 8 |                 ) where
 9 | 
10 | import           BayesStack.Models.Topic.Types
11 | import           BayesStack.Models.Topic.CitationInfluence
12 | 
13 | import qualified Data.Set as S
14 | import           Data.Set (Set)
15 | 
16 | import qualified Data.Map as M
17 | 
18 | import           Data.Maybe (mapMaybe)
19 | import           Control.Applicative
20 | 
21 | import           Data.Char (isAlpha)
22 | import qualified Data.Text as T
23 | import qualified Data.Text.IO as TIO
24 | import           Data.Text.Read (decimal)
25 | import           Data.Binary
26 | import qualified Data.ByteString as BS
27 | import           SerializeText ()
28 | 
29 | import           System.FilePath ((</>))
30 | import           System.Directory
31 | import           Data.List
32 | 
33 | type Term = T.Text
34 | type NodeName = T.Text
35 | 
36 | readEdges :: FilePath -> IO (Set (NodeName, NodeName))
37 | readEdges fname =
38 |     S.fromList . mapMaybe parseLine . T.lines <$> TIO.readFile fname
39 |     where parseLine :: T.Text -> Maybe (NodeName, NodeName)
40 |           parseLine l = case T.words l of
41 |              [a,b]     -> Just (a, b)
42 |              otherwise -> Nothing
43 | 
44 | readNodeItems :: Set Term -> FilePath -> IO (M.Map NodeName [Term])
45 | readNodeItems stopWords fname =
46 |     M.unionsWith (++) . map parseLine . T.lines <$> TIO.readFile fname
47 |     where parseLine :: T.Text -> M.Map NodeName [Term]
48 |           parseLine l = case T.words l of
49 |              n:words ->
50 |                  M.singleton n
51 |                  $ filter (`S.notMember` stopWords) words
52 |              otherwise -> M.empty
53 | 
54 | getLastSweep :: FilePath -> IO FilePath
55 | getLastSweep sweepsDir =
56 |     (sweepsDir </>) . last . sort . filter (".state" `isSuffixOf`)
57 |     <$> getDirectoryContents sweepsDir
58 | 
59 | readItemMap :: FilePath -> IO (M.Map Item Term)
60 | readItemMap sweepsDir =
61 |     decodeFile $ sweepsDir </> "item-map"
62 | 
63 | readNodeMap :: FilePath -> IO (M.Map Node NodeName)
64 | readNodeMap sweepsDir =
65 |     decodeFile $ sweepsDir </> "node-map"
66 | 


--------------------------------------------------------------------------------
/network-topic-models/ReadRelevanceData.hs:
--------------------------------------------------------------------------------
 1 | module ReadRelevanceData where
 2 | 
 3 | import           Control.Applicative
 4 | import qualified Data.Map.Strict as M
 5 | import           Data.Set (Set)
 6 | import qualified Data.Text as T
 7 | import qualified Data.Text.IO as TIO
 8 | 
 9 | import           BayesStack.Models.Topic.LDARelevance
10 | import           ReadData hiding (readNodeItems)
11 | import           Data.Attoparsec.Text hiding (option)
12 | 
13 | readNodeItems :: Set Term -> FilePath
14 |               -> IO (Either String (M.Map NodeName [(Term, ItemWeight)]))
15 | readNodeItems stopWords fname =
16 |     fmap (M.unionsWith (++)) . parseOnly (many1 line) <$> TIO.readFile fname
17 |     where line = do doc <- takeTill isHorizontalSpace
18 |                     words <- flip manyTill endOfLine $ do
19 |                         skipSpace
20 |                         word <- takeTill isHorizontalSpace
21 |                         skipSpace
22 |                         weight <- double
23 |                         return (word, realToFrac weight)
24 |                     return $ M.singleton doc words
25 | 


--------------------------------------------------------------------------------
/network-topic-models/RunCI.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving, StandaloneDeriving #-}
  2 | 
  3 | import           Prelude hiding (mapM)
  4 | 
  5 | import           Options.Applicative
  6 | import           Data.Monoid ((<>))
  7 | import           Control.Monad.Trans.Class
  8 | 
  9 | import           Data.Vector (Vector)
 10 | import qualified Data.Vector.Unboxed as VU
 11 | import qualified Data.Vector.Generic as V
 12 | import           Statistics.Sample (mean)
 13 | 
 14 | import           Data.Traversable (mapM)
 15 | import qualified Data.Set as S
 16 | import           Data.Set (Set)
 17 | import qualified Data.Map as M
 18 | 
 19 | import           ReadData
 20 | import           SerializeText
 21 | import qualified RunSampler as Sampler
 22 | import           BayesStack.DirMulti
 23 | import           BayesStack.Models.Topic.CitationInfluence
 24 | import           BayesStack.UniqueKey
 25 | 
 26 | import qualified Data.Text as T
 27 | import qualified Data.Text.IO as TIO
 28 | 
 29 | import           System.FilePath.Posix ((</>))
 30 | import           Data.Binary
 31 | import qualified Data.ByteString as BS
 32 | import           Text.Printf
 33 | 
 34 | import           Data.Random
 35 | import           System.Random.MWC
 36 | 
 37 | data RunOpts = RunOpts { arcsFile        :: FilePath
 38 |                        , nodesFile       :: FilePath
 39 |                        , stopwords       :: Maybe FilePath
 40 |                        , nTopics         :: Int
 41 |                        , samplerOpts     :: Sampler.SamplerOpts
 42 |                        , hyperParams     :: HyperParams
 43 |                        , noClean         :: Bool
 44 |                        }
 45 | 
 46 | runOpts = RunOpts
 47 |     <$> strOption  ( long "arcs"
 48 |                   <> short 'a'
 49 |                   <> metavar "FILE"
 50 |                   <> help "File containing arcs"
 51 |                    )
 52 |     <*> strOption  ( long "nodes"
 53 |                   <> short 'n'
 54 |                   <> metavar "FILE"
 55 |                   <> help "File containing nodes' items"
 56 |                    )
 57 |     <*> nullOption ( long "stopwords"
 58 |                   <> short 's'
 59 |                   <> metavar "FILE"
 60 |                   <> reader (pure . Just)
 61 |                   <> value Nothing
 62 |                   <> help "Stop words list"
 63 |                    )
 64 |     <*> option     ( long "topics"
 65 |                   <> short 't'
 66 |                   <> metavar "N"
 67 |                   <> value 20
 68 |                   <> help "Number of topics"
 69 |                    )
 70 |     <*> Sampler.samplerOpts
 71 |     <*> hyperOpts
 72 |     <*> flag False True ( long "no-clean"
 73 |                        <> short 'c'
 74 |                        <> help "Don't attempt to sanitize input data. Among other things, nodes without friends will not be discarded"
 75 |                         )
 76 | 
 77 | hyperOpts = HyperParams
 78 |     <$> option     ( long "prior-psi"
 79 |                   <> value 1
 80 |                   <> help "Dirichlet parameter for prior on psi"
 81 |                    )
 82 |     <*> option     ( long "prior-lambda"
 83 |                   <> value 0.1
 84 |                   <> help "Dirichlet parameter for prior on lambda"
 85 |                    )
 86 |     <*> option     ( long "prior-phi"
 87 |                   <> value 0.01
 88 |                   <> help "Dirichlet parameter for prior on phi"
 89 |                    )
 90 |     <*> option     ( long "prior-omega"
 91 |                   <> value 0.01
 92 |                   <> help "Dirichlet parameter for prior on omega"
 93 |                    )
 94 |     <*> option     ( long "prior-gamma-shared"
 95 |                   <> value 0.9
 96 |                   <> help "Beta parameter for prior on gamma (shared)"
 97 |                    )
 98 |     <*> option     ( long "prior-gamma-own"
 99 |                   <> value 0.1
100 |                   <> help "Beta parameter for prior on gamma (own)"
101 |                    )
102 | 
103 | mapMKeys :: (Ord k, Ord k', Monad m, Applicative m)
104 |          => (a -> m a') -> (k -> m k') -> M.Map k a -> m (M.Map k' a')
105 | mapMKeys f g x = M.fromList <$> (mapM (\(k,v)->(,) <$> g k <*> f v) $ M.assocs x)
106 | 
107 | termsToItems :: M.Map NodeName [Term] -> Set (NodeName, NodeName)
108 |              -> ( (M.Map Node [Item], Set (Node, Node))
109 |                 , (M.Map Item Term, M.Map Node NodeName))
110 | termsToItems nodes arcs =
111 |     let ((d', nodeMap), itemMap) =
112 |             runUniqueKey' [Item i | i <- [0..]] $
113 |             runUniqueKeyT' [Node i | i <- [0..]] $ do
114 |                 a <- mapMKeys (mapM (lift . getUniqueKey)) getUniqueKey nodes
115 |                 b <- S.fromList <$> mapM (\(x,y)->(,) <$> getUniqueKey x <*> getUniqueKey y)
116 |                      (S.toList arcs)
117 |                 return (a,b)
118 |     in (d', (itemMap, nodeMap))
119 | 
120 | makeNetData :: HyperParams -> M.Map Node [Item] -> Set Arc -> Int -> NetData
121 | makeNetData hp nodeItems arcs nTopics =
122 |     netData hp arcs nodeItems' topics
123 |   where topics = S.fromList [Topic i | i <- [1..nTopics]]
124 |         nodeItems' = M.fromList
125 |                      $ zip [NodeItem i | i <- [0..]]
126 |                      $ do (n,items) <- M.assocs nodeItems
127 |                           item <- items
128 |                           return (n, item)
129 | 
130 | opts = info runOpts
131 |            (  fullDesc
132 |            <> progDesc "Learn citation influence model"
133 |            <> header "run-ci - learn citation influence model"
134 |            )
135 | 
136 | edgesToArcs :: Set (Node, Node) -> Set Arc
137 | edgesToArcs = S.map (\(a,b)->Arc (Citing a) (Cited b))
138 | 
139 | instance Sampler.SamplerModel MState where
140 |     estimateHypers = id -- reestimate -- FIXME
141 |     modelLikelihood = modelLikelihood
142 |     summarizeHypers ms =  "" -- FIXME
143 | 
144 | main = do
145 |     args <- execParser opts
146 |     stopWords <- case stopwords args of
147 |                      Just f  -> S.fromList . T.words <$> TIO.readFile f
148 |                      Nothing -> return S.empty
149 |     printf "Read %d stopwords\n" (S.size stopWords)
150 | 
151 |     ((nodeItems, a), (itemMap, nodeMap)) <- termsToItems
152 |                             <$> readNodeItems stopWords (nodesFile args)
153 |                             <*> readEdges (arcsFile args)
154 |     let arcs = edgesToArcs a
155 | 
156 |     Sampler.createSweeps $ samplerOpts args
157 |     let sweepsDir = Sampler.sweepsDir $ samplerOpts args
158 |     encodeFile (sweepsDir </> "item-map") itemMap
159 |     encodeFile (sweepsDir </> "node-map") nodeMap
160 | 
161 |     let termCounts = V.fromListN (M.size nodeItems)
162 |                      $ map length $ M.elems nodeItems :: Vector Int
163 |     printf "Read %d arcs, %d nodes, %d node-items\n" (S.size arcs) (M.size nodeItems) (V.sum termCounts)
164 |     printf "Mean items per node:  %1.2f\n" (mean $ V.map realToFrac termCounts)
165 | 
166 |     withSystemRandom $ \mwc->do
167 |     let nd = (if noClean args then id else cleanNetData)
168 |              $ makeNetData (hyperParams args) nodeItems arcs (nTopics args)
169 |     mapM_ putStrLn $ verifyNetData (\n->maybe (show n) show $ M.lookup n nodeMap) nd
170 | 
171 |     let nCitingNodes = VU.fromList $ M.elems $ M.unionsWith (+)
172 |                        $ map (\a->M.singleton (citingNode a) 1)
173 |                        $ S.toList $ dArcs nd
174 | 
175 |         nCitedNodes  = VU.fromList $ M.elems $ M.unionsWith (+)
176 |                        $ map (\a->M.singleton (citedNode a) 1)
177 |                        $ S.toList $ dArcs nd
178 |     printf "After cleaning: %d cited nodes, %d citing nodes, %d arcs, %d node-items\n"
179 |            (S.size $ S.map citedNode $ dArcs nd) (S.size $ S.map citingNode $ dArcs nd)
180 |            (S.size $ dArcs nd) (M.size $ dNodeItems nd)
181 |     printf "In degree:  mean=%3.1f, maximum=%3.1f\n"
182 |            (mean nCitedNodes) (V.maximum nCitedNodes)
183 |     printf "Out degree: mean=%3.1f, maximum=%3.1f\n"
184 |            (mean nCitingNodes) (V.maximum nCitingNodes)
185 | 
186 |     encodeFile (sweepsDir </> "data") nd
187 |     mInit <- runRVar (randomInitialize nd) mwc
188 |     let m = model nd mInit
189 |     Sampler.runSampler (samplerOpts args) m (updateUnits nd)
190 |     return ()
191 | 


--------------------------------------------------------------------------------
/network-topic-models/RunCINT.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving, StandaloneDeriving #-}
  2 | 
  3 | import           Prelude hiding (mapM)
  4 | import           Control.Lens                 
  5 | 
  6 | import           Options.Applicative
  7 | import           Data.Monoid ((<>))
  8 | import           Control.Monad.Trans.Class
  9 | 
 10 | import           Data.Vector (Vector)
 11 | import qualified Data.Vector.Unboxed as VU
 12 | import qualified Data.Vector.Generic as V
 13 | import           Statistics.Sample (mean)
 14 | 
 15 | import           Data.Traversable (mapM)
 16 | import qualified Data.Set as S
 17 | import           Data.Set (Set)
 18 | import qualified Data.Map as M
 19 | 
 20 | import           ReadData
 21 | import           SerializeText
 22 | import qualified RunSampler as Sampler
 23 | import           BayesStack.DirMulti
 24 | import           BayesStack.Models.Topic.CitationInfluenceNoTopics
 25 | import           BayesStack.UniqueKey
 26 | 
 27 | import qualified Data.Text as T
 28 | import qualified Data.Text.IO as TIO
 29 | 
 30 | import           System.FilePath.Posix ((</>))
 31 | import           Data.Binary
 32 | import qualified Data.ByteString as BS
 33 | import           Text.Printf
 34 | 
 35 | import           Data.Random
 36 | import           System.Random.MWC
 37 | 
 38 | data RunOpts = RunOpts { arcsFile        :: FilePath
 39 |                        , nodesFile       :: FilePath
 40 |                        , stopwords       :: Maybe FilePath
 41 |                        , samplerOpts     :: Sampler.SamplerOpts
 42 |                        , hyperParams     :: HyperParams
 43 |                        , noClean         :: Bool
 44 |                        }
 45 | 
 46 | runOpts = RunOpts
 47 |     <$> strOption  ( long "arcs"
 48 |                   <> short 'a'
 49 |                   <> metavar "FILE"
 50 |                   <> help "File containing arcs"
 51 |                    )
 52 |     <*> strOption  ( long "nodes"
 53 |                   <> short 'n'
 54 |                   <> metavar "FILE"
 55 |                   <> help "File containing nodes' items"
 56 |                    )
 57 |     <*> nullOption ( long "stopwords"
 58 |                   <> short 's'
 59 |                   <> metavar "FILE"
 60 |                   <> reader (pure . Just)
 61 |                   <> value Nothing
 62 |                   <> help "Stop words list"
 63 |                    )
 64 |     <*> Sampler.samplerOpts
 65 |     <*> hyperOpts
 66 |     <*> flag False True ( long "no-clean"
 67 |                        <> short 'c'
 68 |                        <> help "Don't attempt to sanitize input data. Among other things, nodes without friends will not be discarded"
 69 |                         )
 70 | 
 71 | hyperOpts = HyperParams
 72 |     <$> option     ( long "prior-psi"
 73 |                   <> value 1
 74 |                   <> help "Dirichlet parameter for prior on psi"
 75 |                    )
 76 |     <*> option     ( long "prior-lambda"
 77 |                   <> value 0.01
 78 |                   <> help "Dirichlet parameter for prior on lambda"
 79 |                    )
 80 |     <*> option     ( long "prior-omega"
 81 |                   <> value 0.01
 82 |                   <> help "Dirichlet parameter for prior on omega"
 83 |                    )
 84 |     <*> option     ( long "prior-gamma-shared"
 85 |                   <> value 0.9
 86 |                   <> help "Beta parameter for prior on gamma (shared)"
 87 |                    )
 88 |     <*> option     ( long "prior-gamma-own"
 89 |                   <> value 0.1
 90 |                   <> help "Beta parameter for prior on gamma (own)"
 91 |                    )
 92 |     <*> option     ( long "prior-beta-fg"
 93 |                   <> value 0.1
 94 |                   <> help "Beta parameter for prior on language background model (foreground)"
 95 |                    )
 96 |     <*> option     ( long "prior-beta-bg"
 97 |                   <> value 0.1
 98 |                   <> help "Beta parameter for prior on language background model (background)"
 99 |                    )
100 | 
101 | mapMKeys :: (Ord k, Ord k', Monad m, Applicative m)
102 |          => (a -> m a') -> (k -> m k') -> M.Map k a -> m (M.Map k' a')
103 | mapMKeys f g x = M.fromList <$> (mapM (\(k,v)->(,) <$> g k <*> f v) $ M.assocs x)
104 | 
105 | termsToItems :: M.Map NodeName [Term] -> Set (NodeName, NodeName)
106 |              -> ( (M.Map Node [Item], Set (Node, Node))
107 |                 , (M.Map Item Term, M.Map Node NodeName))
108 | termsToItems nodes arcs =
109 |     let ((d', nodeMap), itemMap) =
110 |             runUniqueKey' [Item i | i <- [0..]] $
111 |             runUniqueKeyT' [Node i | i <- [0..]] $ do
112 |                 a <- mapMKeys (mapM (lift . getUniqueKey)) getUniqueKey nodes
113 |                 b <- S.fromList <$> mapM (\(x,y)->(,) <$> getUniqueKey x <*> getUniqueKey y)
114 |                      (S.toList arcs)
115 |                 return (a,b)
116 |     in (d', (itemMap, nodeMap))
117 | 
118 | makeNetData :: HyperParams -> M.Map Node [Item] -> Set Arc -> NetData
119 | makeNetData hp nodeItems arcs =
120 |     netData hp arcs items nodeItems'
121 |   where items = M.unions $ concatMap (map $ flip M.singleton 1) $ M.elems nodeItems
122 |         nodeItems' = M.fromList
123 |                     $ zip [NodeItem i | i <- [0..]]
124 |                     $ do (n,items) <- M.assocs nodeItems
125 |                          item <- items
126 |                          return (n, item)
127 | 
128 | opts = info runOpts
129 |            (  fullDesc
130 |            <> progDesc "Learn citation influence model"
131 |            <> header "run-ci - learn citation influence model"
132 |            )
133 | 
134 | edgesToArcs :: Set (Node, Node) -> Set Arc
135 | edgesToArcs = S.map (\(a,b)->Arc (Citing a) (Cited b))
136 | 
137 | instance Sampler.SamplerModel MState where
138 |     estimateHypers = id -- reestimate -- FIXME
139 |     modelLikelihood = modelLikelihood
140 |     summarizeHypers ms =  "" -- FIXME
141 | 
142 | main = do
143 |     args <- execParser opts
144 |     stopWords <- case stopwords args of
145 |                      Just f  -> S.fromList . T.words <$> TIO.readFile f
146 |                      Nothing -> return S.empty
147 |     printf "Read %d stopwords\n" (S.size stopWords)
148 | 
149 |     ((nodeItems, a), (itemMap, nodeMap)) <- termsToItems
150 |                             <$> readNodeItems stopWords (nodesFile args)
151 |                             <*> readEdges (arcsFile args)
152 |     let arcs = edgesToArcs a
153 | 
154 |     Sampler.createSweeps $ samplerOpts args
155 |     let sweepsDir = Sampler.sweepsDir $ samplerOpts args
156 |     encodeFile (sweepsDir </> "item-map") itemMap
157 |     encodeFile (sweepsDir </> "node-map") nodeMap
158 | 
159 |     let termCounts = V.fromListN (M.size nodeItems)
160 |                      $ map length $ M.elems nodeItems :: Vector Int
161 |     printf "Read %d arcs, %d nodes, %d node-items\n" (S.size arcs) (M.size nodeItems) (V.sum termCounts)
162 |     printf "Mean items per node:  %1.2f\n" (mean $ V.map realToFrac termCounts)
163 | 
164 |     withSystemRandom $ \mwc->do
165 |     let nd = (if noClean args then id else cleanNetData)
166 |              $ makeNetData (hyperParams args) nodeItems arcs 
167 |     mapM_ putStrLn $ verifyNetData (\n->maybe (show n) show $ M.lookup n nodeMap) nd
168 | 
169 |     let nCitingNodes = VU.fromList $ M.elems $ M.unionsWith (+)
170 |                        $ map (\a->M.singleton (citingNode a) 1)
171 |                        $ S.toList $ nd^.dArcs
172 | 
173 |         nCitedNodes  = VU.fromList $ M.elems $ M.unionsWith (+)
174 |                        $ map (\a->M.singleton (citedNode a) 1)
175 |                        $ S.toList $ nd^.dArcs
176 |     printf "After cleaning: %d cited nodes, %d citing nodes, %d arcs, %d node-items\n"
177 |            (S.size $ S.map citedNode $ nd^.dArcs) (S.size $ S.map citingNode $ nd^.dArcs)
178 |            (nd ^. dArcs . to S.size) (nd ^. dNodeItems . to M.size)
179 |     printf "In degree:  mean=%3.1f, maximum=%3.1f\n"
180 |            (mean nCitedNodes) (V.maximum nCitedNodes)
181 |     printf "Out degree: mean=%3.1f, maximum=%3.1f\n"
182 |            (mean nCitingNodes) (V.maximum nCitingNodes)
183 | 
184 |     encodeFile (sweepsDir </> "data") nd
185 |     mInit <- runRVar (randomInitialize nd) mwc
186 |     let m = model nd mInit
187 |     Sampler.runSampler (samplerOpts args) m (updateUnits nd)
188 |     return ()
189 | 


--------------------------------------------------------------------------------
/network-topic-models/RunLDA.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving, StandaloneDeriving #-}
  2 | 
  3 | import           Prelude hiding (mapM)
  4 | 
  5 | import           Options.Applicative
  6 | import           Data.Monoid ((<>))
  7 | import           Control.Monad.Trans.Class
  8 | 
  9 | import           Data.Vector (Vector)
 10 | import qualified Data.Vector.Generic as V
 11 | import           Statistics.Sample (mean)
 12 | 
 13 | import           Data.Traversable (mapM)
 14 | import qualified Data.Set as S
 15 | import           Data.Set (Set)
 16 | import qualified Data.Map.Strict as M
 17 | 
 18 | import           ReadData
 19 | import           SerializeText
 20 | import qualified RunSampler as Sampler
 21 | import           BayesStack.DirMulti
 22 | import           BayesStack.Models.Topic.LDA
 23 | import           BayesStack.UniqueKey
 24 | 
 25 | import qualified Data.Text as T
 26 | import qualified Data.Text.IO as TIO
 27 | 
 28 | import           System.FilePath.Posix ((</>))
 29 | import           Data.Binary
 30 | import qualified Data.ByteString as BS
 31 | import           Text.Printf
 32 | 
 33 | import           Data.Random
 34 | import           System.Random.MWC
 35 | 
 36 | data RunOpts = RunOpts { nodesFile       :: FilePath
 37 |                        , stopwords       :: Maybe FilePath
 38 |                        , nTopics         :: Int
 39 |                        , samplerOpts     :: Sampler.SamplerOpts
 40 |                        , hyperParams     :: HyperParams
 41 |                        }
 42 | 
 43 | runOpts :: Parser RunOpts
 44 | runOpts = RunOpts
 45 |     <$> strOption  ( long "nodes"
 46 |                   <> short 'n'
 47 |                   <> metavar "FILE"
 48 |                   <> help "File containing nodes and their associated items"
 49 |                    )
 50 |     <*> nullOption ( long "stopwords"
 51 |                   <> short 's'
 52 |                   <> metavar "FILE"
 53 |                   <> reader (pure . Just)
 54 |                   <> value Nothing
 55 |                   <> help "Stop word list"
 56 |                    )
 57 |     <*> option     ( long "topics"
 58 |                   <> short 't'
 59 |                   <> metavar "N"
 60 |                   <> value 20
 61 |                   <> help "Number of topics"
 62 |                    )
 63 |     <*> Sampler.samplerOpts
 64 |     <*> hyperOpts
 65 | 
 66 | hyperOpts = HyperParams
 67 |     <$> option     ( long "prior-theta"
 68 |                   <> value 1
 69 |                   <> help "Dirichlet parameter for prior on theta"
 70 |                    )
 71 |     <*> option     ( long "prior-phi"
 72 |                   <> value 0.1
 73 |                   <> help "Dirichlet parameter for prior on phi"
 74 |                    )
 75 | 
 76 | mapMKeys :: (Ord k, Ord k', Monad m, Applicative m)
 77 |          => (a -> m a') -> (k -> m k') -> M.Map k a -> m (M.Map k' a')
 78 | mapMKeys f g x = M.fromList <$> (mapM (\(k,v)->(,) <$> g k <*> f v) $ M.assocs x)
 79 | 
 80 | termsToItems :: M.Map NodeName [Term]
 81 |              -> (M.Map Node [Item], (M.Map Item Term, M.Map Node NodeName))
 82 | termsToItems nodes =
 83 |     let ((d', nodeMap), itemMap) =
 84 |             runUniqueKey' [Item i | i <- [0..]] $
 85 |             runUniqueKeyT' [Node i | i <- [0..]] $ do
 86 |                 mapMKeys (mapM (lift . getUniqueKey)) getUniqueKey nodes
 87 |     in (d', (itemMap, nodeMap))
 88 | 
 89 | netData :: HyperParams -> M.Map Node [Item] -> Int -> NetData
 90 | netData hp nodeItems nTopics =
 91 |     NetData { dHypers           = hp
 92 |             , dItems            = S.unions $ map S.fromList $ M.elems nodeItems
 93 |             , dTopics           = S.fromList [Topic i | i <- [1..nTopics]]
 94 |             , dNodeItems        = M.fromList
 95 |                                   $ zip [NodeItem i | i <- [0..]]
 96 |                                   $ do (n,items) <- M.assocs nodeItems
 97 |                                        item <- items
 98 |                                        return (n, item)
 99 |             , dNodes            = M.keysSet nodeItems
100 |             }
101 | 
102 | opts :: ParserInfo RunOpts
103 | opts = info runOpts (  fullDesc
104 |                     <> progDesc "Learn LDA model"
105 |                     <> header "run-lda - learn LDA model"
106 |                     )
107 | 
108 | instance Sampler.SamplerModel MState where
109 |     estimateHypers = reestimate
110 |     modelLikelihood = modelLikelihood
111 |     summarizeHypers ms =
112 |         "  phi  : "++show (dmAlpha $ snd $ M.findMin $ stPhis ms)++"\n"++
113 |         "  theta: "++show (dmAlpha $ snd $ M.findMin $ stThetas ms)++"\n"
114 | 
115 | main :: IO ()
116 | main = do
117 |     args <- execParser opts
118 |     stopWords <- case stopwords args of
119 |                      Just f  -> S.fromList . T.words <$> TIO.readFile f
120 |                      Nothing -> return S.empty
121 |     printf "Read %d stopwords\n" (S.size stopWords)
122 | 
123 |     (nodeItems, (itemMap, nodeMap)) <- termsToItems
124 |                             <$> readNodeItems stopWords (nodesFile args)
125 | 
126 |     Sampler.createSweeps $ samplerOpts args
127 |     let sweepsDir = Sampler.sweepsDir $ samplerOpts args
128 |     encodeFile (sweepsDir </> "item-map") itemMap
129 |     encodeFile (sweepsDir </> "node-map") nodeMap
130 | 
131 |     let termCounts = V.fromListN (M.size nodeItems)
132 |                      $ map length $ M.elems nodeItems :: Vector Int
133 |     printf "Read %d nodes\n" (M.size nodeItems)
134 |     printf "Mean items per node:  %1.2f\n" (mean $ V.map realToFrac termCounts)
135 | 
136 |     withSystemRandom $ \mwc->do
137 |     let nd = netData (hyperParams args) nodeItems (nTopics args)
138 |     encodeFile (sweepsDir </> "data") nd
139 |     mInit <- runRVar (randomInitialize nd) mwc
140 |     let m = model nd mInit
141 |     Sampler.runSampler (samplerOpts args) m (updateUnits nd)
142 |     return ()
143 | 


--------------------------------------------------------------------------------
/network-topic-models/RunLDARelevance.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving, StandaloneDeriving, TupleSections #-}
  2 | 
  3 | import           Prelude hiding (mapM)
  4 | 
  5 | import           Options.Applicative
  6 | import           Data.Monoid ((<>))
  7 | import           Control.Monad.Trans.Class
  8 | 
  9 | import           Data.Vector (Vector)
 10 | import qualified Data.Vector.Generic as V
 11 | import           Statistics.Sample (mean)
 12 | 
 13 | import           Data.Traversable (mapM)
 14 | import qualified Data.Set as S
 15 | import           Data.Set (Set)
 16 | import qualified Data.Map.Strict as M
 17 | 
 18 | import qualified Data.Text as T
 19 | import qualified Data.Text.IO as TIO
 20 | import           ReadData hiding (readNodeItems)
 21 | import           ReadRelevanceData (readNodeItems)
 22 | import           SerializeText
 23 | import qualified RunSampler as Sampler
 24 | import           BayesStack.DirMulti
 25 | import           BayesStack.Models.Topic.LDARelevance
 26 | import           BayesStack.UniqueKey
 27 | 
 28 | import           System.Directory (createDirectoryIfMissing)
 29 | import           System.FilePath.Posix ((</>))
 30 | import           Data.Binary
 31 | import qualified Data.ByteString as BS
 32 | import           Text.Printf
 33 | 
 34 | import           Data.Random
 35 | import           System.Random.MWC
 36 | 
 37 | data RunOpts = RunOpts { nodesFile       :: FilePath
 38 |                        , stopwords       :: Maybe FilePath
 39 |                        , nTopics         :: Int
 40 |                        , samplerOpts     :: Sampler.SamplerOpts
 41 |                        , hyperParams     :: HyperParams
 42 |                        }
 43 | 
 44 | runOpts :: Parser RunOpts
 45 | runOpts = RunOpts
 46 |     <$> strOption  ( long "nodes"
 47 |                   <> short 'n'
 48 |                   <> metavar "FILE"
 49 |                   <> help "File containing nodes and their associated items"
 50 |                    )
 51 |     <*> nullOption ( long "stopwords"
 52 |                   <> short 's'
 53 |                   <> metavar "FILE"
 54 |                   <> reader (pure . Just)
 55 |                   <> value Nothing
 56 |                   <> help "Stop word list"
 57 |                    )
 58 |     <*> option     ( long "topics"
 59 |                   <> short 't'
 60 |                   <> metavar "N"
 61 |                   <> value 20
 62 |                   <> help "Number of topics"
 63 |                    )
 64 |     <*> Sampler.samplerOpts
 65 |     <*> hyperOpts
 66 | 
 67 | hyperOpts = HyperParams
 68 |     <$> option     ( long "prior-theta"
 69 |                   <> value 1
 70 |                   <> help "Dirichlet parameter for prior on theta"
 71 |                    )
 72 |     <*> option     ( long "prior-phi"
 73 |                   <> value 0.1
 74 |                   <> help "Dirichlet parameter for prior on phi"
 75 |                    )
 76 | 
 77 | mapMKeys :: (Ord k, Ord k', Monad m, Applicative m)
 78 |          => (a -> m a') -> (k -> m k') -> M.Map k a -> m (M.Map k' a')
 79 | mapMKeys f g x = M.fromList <$> (mapM (\(k,v)->(,) <$> g k <*> f v) $ M.assocs x)
 80 | 
 81 | termsToItems :: M.Map NodeName [(Term, ItemWeight)]
 82 |              -> (M.Map Node [(Item, ItemWeight)], (M.Map Item Term, M.Map Node NodeName))
 83 | termsToItems nodes =
 84 |     let ((d', nodeMap), itemMap) =
 85 |             runUniqueKey' [Item i | i <- [0..]] $
 86 |             runUniqueKeyT' [Node i | i <- [0..]] $ do
 87 |                 mapMKeys (mapM (\(x,w)->(,w) `fmap` lift (getUniqueKey x))) getUniqueKey nodes
 88 |     in (d', (itemMap, nodeMap))
 89 | 
 90 | netData :: HyperParams -> M.Map Node [(Item,ItemWeight)] -> Int -> NetData
 91 | netData hp nodeItems nTopics =
 92 |     NetData { dHypers           = hp
 93 |             , dItems            = M.unions $ map M.fromList $ M.elems nodeItems
 94 |             , dTopics           = S.fromList [Topic i | i <- [1..nTopics]]
 95 |             , dNodeItems        = M.fromList
 96 |                                   $ zip [NodeItem i | i <- [0..]]
 97 |                                   $ do (n,items) <- M.assocs nodeItems
 98 |                                        (item,weight) <- items
 99 |                                        return (n, item)
100 |             , dNodes            = M.keysSet nodeItems
101 |             }
102 | 
103 | opts :: ParserInfo RunOpts
104 | opts = info runOpts (  fullDesc
105 |                     <> progDesc "Learn LDA model"
106 |                     <> header "run-lda - learn LDA model"
107 |                     )
108 | 
109 | instance Sampler.SamplerModel MState where
110 |     estimateHypers = reestimate
111 |     modelLikelihood = modelLikelihood
112 |     summarizeHypers ms =
113 |         "  phi  : "++show (dmAlpha $ snd $ M.findMin $ stPhis ms)++"\n"++
114 |         "  theta: "++show (dmAlpha $ snd $ M.findMin $ stThetas ms)++"\n"
115 | 
116 | main :: IO ()
117 | main = do
118 |     args <- execParser opts
119 |     stopWords <- case stopwords args of
120 |                      Just f  -> S.fromList . T.words <$> TIO.readFile f
121 |                      Nothing -> return S.empty
122 |     printf "Read %d stopwords\n" (S.size stopWords)
123 | 
124 |     let parseError s = do
125 |             putStrLn $ "Error parsing nodes: "++s
126 |             putStrLn $ "Expected format is:"
127 |             putStrLn $ "(node)  (term) (weight)  (term) (weight) ..."
128 |             fail ""
129 |     (nodeItems, (itemMap, nodeMap)) <- either parseError (return . termsToItems)
130 |                             =<< readNodeItems stopWords (nodesFile args)
131 | 
132 |     let sweepsDir = Sampler.sweepsDir $ samplerOpts args
133 |     Sampler.createSweeps $ samplerOpts args
134 |     encodeFile (sweepsDir </> "item-map") itemMap
135 |     encodeFile (sweepsDir </> "node-map") nodeMap
136 | 
137 |     let termCounts = V.fromListN (M.size nodeItems)
138 |                      $ map length $ M.elems nodeItems :: Vector Int
139 |     printf "Read %d nodes\n" (M.size nodeItems)
140 |     printf "Mean items per node:  %1.2f\n" (mean $ V.map realToFrac termCounts)
141 | 
142 |     withSystemRandom $ \mwc->do
143 |     let nd = netData (hyperParams args) nodeItems (nTopics args)
144 |     encodeFile (sweepsDir </> "data") nd
145 |     mInit <- runRVar (randomInitialize nd) mwc
146 |     let m = model nd mInit
147 |     Sampler.runSampler (samplerOpts args) m (updateUnits nd)
148 |     return ()
149 | 


--------------------------------------------------------------------------------
/network-topic-models/RunST.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving, StandaloneDeriving #-}
  2 | 
  3 | import           Prelude hiding (mapM)
  4 | 
  5 | import           Options.Applicative
  6 | import           Data.Monoid ((<>))
  7 | import           Control.Monad.Trans.Class
  8 | 
  9 | import           Data.Vector (Vector)
 10 | import qualified Data.Vector.Generic as V
 11 | import           Statistics.Sample (mean)
 12 | 
 13 | import           Data.Traversable (mapM)
 14 | import qualified Data.Set as S
 15 | import           Data.Set (Set)
 16 | import qualified Data.Map as M
 17 | 
 18 | import           ReadData
 19 | import           SerializeText
 20 | import qualified RunSampler as Sampler
 21 | import           BayesStack.DirMulti
 22 | import           BayesStack.Models.Topic.SharedTaste
 23 | import           BayesStack.UniqueKey
 24 | 
 25 | import qualified Data.Text as T
 26 | import qualified Data.Text.IO as TIO
 27 | 
 28 | import           System.FilePath.Posix ((</>))
 29 | import           Data.Binary
 30 | import qualified Data.ByteString as BS
 31 | import           Text.Printf
 32 | 
 33 | import           Data.Random
 34 | import           System.Random.MWC
 35 | 
 36 | data RunOpts = RunOpts { arcsFile        :: FilePath
 37 |                        , nodesFile       :: FilePath
 38 |                        , stopwords       :: Maybe FilePath
 39 |                        , nTopics         :: Int
 40 |                        , samplerOpts     :: Sampler.SamplerOpts
 41 |                        , hyperParams     :: HyperParams
 42 |                        }
 43 | 
 44 | runOpts = RunOpts
 45 |     <$> strOption  ( long "edges"
 46 |                   <> short 'e'
 47 |                   <> metavar "FILE"
 48 |                   <> help "File containing edges"
 49 |                    )
 50 |     <*> strOption  ( long "nodes"
 51 |                   <> short 'n'
 52 |                   <> metavar "FILE"
 53 |                   <> help "File containing nodes' items"
 54 |                    )
 55 |     <*> nullOption ( long "stopwords"
 56 |                   <> short 's'
 57 |                   <> metavar "FILE"
 58 |                   <> reader (pure . Just)
 59 |                   <> value Nothing
 60 |                   <> help "Stop words list"
 61 |                    )
 62 |     <*> option     ( long "topics"
 63 |                   <> short 't'
 64 |                   <> metavar "N"
 65 |                   <> value 20
 66 |                   <> help "Number of topics"
 67 |                    )
 68 |     <*> Sampler.samplerOpts
 69 |     <*> hyperOpts
 70 | 
 71 | hyperOpts = HyperParams
 72 |     <$> option     ( long "prior-psi"
 73 |                   <> value 1
 74 |                   <> help "Dirichlet parameter for prior on psi"
 75 |                    )
 76 |     <*> option     ( long "prior-lambda"
 77 |                   <> value 0.1
 78 |                   <> help "Dirichlet parameter for prior on lambda"
 79 |                    )
 80 |     <*> option     ( long "prior-phi"
 81 |                   <> value 0.01
 82 |                   <> help "Dirichlet parameter for prior on phi"
 83 |                    )
 84 |     <*> option     ( long "prior-omega"
 85 |                   <> value 0.01
 86 |                   <> help "Dirichlet parameter for prior on omega"
 87 |                    )
 88 |     <*> option     ( long "prior-gamma-shared"
 89 |                   <> value 0.9
 90 |                   <> help "Beta parameter for prior on gamma (shared)"
 91 |                    )
 92 |     <*> option     ( long "prior-gamma-own"
 93 |                   <> value 0.1
 94 |                   <> help "Beta parameter for prior on gamma (own)"
 95 |                    )
 96 | 
 97 | mapMKeys :: (Ord k, Ord k', Monad m, Applicative m)
 98 |          => (a -> m a') -> (k -> m k') -> M.Map k a -> m (M.Map k' a')
 99 | mapMKeys f g x = M.fromList <$> (mapM (\(k,v)->(,) <$> g k <*> f v) $ M.assocs x)
100 | 
101 | termsToItems :: M.Map NodeName [Term] -> Set (NodeName, NodeName)
102 |              -> ( (M.Map Node [Item], Set (Node, Node))
103 |                 , (M.Map Item Term, M.Map Node NodeName))
104 | termsToItems nodes arcs =
105 |     let ((d', nodeMap), itemMap) =
106 |             runUniqueKey' [Item i | i <- [0..]] $
107 |             runUniqueKeyT' [Node i | i <- [0..]] $ do
108 |                 a <- mapMKeys (mapM (lift . getUniqueKey)) getUniqueKey nodes
109 |                 b <- S.fromList <$> mapM (\(x,y)->(,) <$> getUniqueKey x <*> getUniqueKey y)
110 |                      (S.toList arcs)
111 |                 return (a,b)
112 |     in (d', (itemMap, nodeMap))
113 | 
114 | netData :: HyperParams -> M.Map Node [Item] -> Set Edge -> Int -> NetData
115 | netData hp nodeItems edges nTopics =
116 |     NetData { dHypers           = hp
117 |             , dEdges            = edges
118 |             , dItems            = S.unions $ map S.fromList $ M.elems nodeItems
119 |             , dTopics           = S.fromList [Topic i | i <- [1..nTopics]]
120 |             , dNodeItems        = M.fromList
121 |                                   $ zip [NodeItem i | i <- [0..]]
122 |                                   $ do (n,items) <- M.assocs nodeItems
123 |                                        item <- items
124 |                                        return (n, item)
125 |             }
126 | 
127 | opts = info runOpts
128 |            (  fullDesc
129 |            <> progDesc "Learn shared taste model"
130 |            <> header "run-st - learn shared taste model"
131 |            )
132 | 
133 | instance Sampler.SamplerModel MState where
134 |     estimateHypers = id -- reestimate -- FIXME
135 |     modelLikelihood = modelLikelihood
136 |     summarizeHypers ms =  "" -- FIXME
137 | 
138 | main = do
139 |     args <- execParser opts
140 |     stopWords <- case stopwords args of
141 |                      Just f  -> S.fromList . T.words <$> TIO.readFile f
142 |                      Nothing -> return S.empty
143 |     printf "Read %d stopwords\n" (S.size stopWords)
144 | 
145 |     ((nodeItems, a), (itemMap, nodeMap)) <- termsToItems
146 |                             <$> readNodeItems stopWords (nodesFile args)
147 |                             <*> readEdges (arcsFile args)
148 |     let edges = S.map Edge a
149 | 
150 |     Sampler.createSweeps $ samplerOpts args
151 |     let sweepsDir = Sampler.sweepsDir $ samplerOpts args
152 |     encodeFile (sweepsDir </> "item-map") itemMap
153 |     encodeFile (sweepsDir </> "node-map") nodeMap
154 | 
155 |     let termCounts = V.fromListN (M.size nodeItems)
156 |                      $ map length $ M.elems nodeItems :: Vector Int
157 |     printf "Read %d edges, %d items\n" (S.size edges) (M.size nodeItems)
158 |     printf "Mean items per node:  %1.2f\n" (mean $ V.map realToFrac termCounts)
159 | 
160 |     withSystemRandom $ \mwc->do
161 |     let nd = netData (hyperParams args) nodeItems edges 10
162 |     encodeFile (sweepsDir </> "data") nd
163 |     mInit <- runRVar (randomInitialize nd) mwc
164 |     let m = model nd mInit
165 |     Sampler.runSampler (samplerOpts args) m (updateUnits nd)
166 |     return ()
167 | 


--------------------------------------------------------------------------------
/network-topic-models/RunSampler.hs:
--------------------------------------------------------------------------------
  1 | module RunSampler ( SamplerModel (..)
  2 |                   , SamplerOpts (..), samplerOpts
  3 |                   , runSampler
  4 |                   , createSweeps
  5 |                   ) where
  6 | 
  7 | import           Options.Applicative
  8 | import           Data.Monoid ((<>))
  9 | import           System.FilePath.Posix ((</>))
 10 | import           System.Directory
 11 | 
 12 | import           Control.Monad (when, forM_, void)
 13 | import qualified Control.Monad.Trans.State as S
 14 | import           Control.Monad.IO.Class
 15 | 
 16 | import           Data.Binary
 17 | import qualified Data.ByteString as BS
 18 | import           Text.Printf
 19 | 
 20 | import           Control.Concurrent
 21 | import           Control.Concurrent.STM
 22 | 
 23 | import           System.Random.MWC
 24 | import           Data.Random
 25 | 
 26 | import           Numeric.Log
 27 | import           BayesStack.Gibbs.Concurrent
 28 | 
 29 | data SamplerOpts = SamplerOpts { burnin          :: Int
 30 |                                , lag             :: Int
 31 |                                , iterations      :: Maybe Int
 32 |                                , updateBlock     :: Int
 33 |                                , sweepsDir       :: FilePath
 34 |                                , nCaps           :: Int
 35 |                                , hyperEstOpts    :: HyperEstOpts
 36 |                                }
 37 | 
 38 | samplerOpts = SamplerOpts
 39 |     <$> option     ( long "burnin"
 40 |                   <> short 'b'
 41 |                   <> metavar "N"
 42 |                   <> value 100
 43 |                   <> help "Number of sweeps to run before taking samples"
 44 |                    )
 45 |     <*> option     ( long "lag"
 46 |                   <> short 'l'
 47 |                   <> metavar "N"
 48 |                   <> value 10
 49 |                   <> help "Number of sweeps between diagnostic samples"
 50 |                    )
 51 |     <*> option     ( long "iterations"
 52 |                   <> short 'i'
 53 |                   <> metavar "N"
 54 |                   <> value Nothing
 55 |                   <> reader (pure . auto)
 56 |                   <> help "Number of sweeps to run for"
 57 |                    )
 58 |     <*> option     ( long "diff-batch"
 59 |                   <> short 'u'
 60 |                   <> metavar "N"
 61 |                   <> value 100
 62 |                   <> help "Number of update diffs to batch before updating global state"
 63 |                    )
 64 |     <*> strOption  ( long "sweeps"
 65 |                   <> short 'S'
 66 |                   <> metavar "DIR"
 67 |                   <> value "sweeps"
 68 |                   <> help "Directory in which to place model state output"
 69 |                    )
 70 |     <*> option     ( long "threads"
 71 |                   <> short 'N'
 72 |                   <> value 1
 73 |                   <> metavar "INT"
 74 |                   <> help "Number of worker threads to start"
 75 |                    )
 76 |     <*> hyperEstOpts'
 77 | 
 78 | data HyperEstOpts = HyperEstOpts { hyperEst       :: Bool
 79 |                                  , hyperBurnin    :: Int
 80 |                                  , hyperLag       :: Int
 81 |                                  }
 82 | 
 83 | hyperEstOpts' = HyperEstOpts
 84 |     <$> switch     ( long "hyper"
 85 |                   <> short 'H'
 86 |                   <> help "Enable hyperparameter estimation"
 87 |                    )
 88 |     <*> option     ( long "hyper-burnin"
 89 |                   <> metavar "N"
 90 |                   <> value 10
 91 |                   <> help "Number of sweeps before starting hyperparameter estimations (must be multiple of --lag)"
 92 |                    )
 93 |     <*> option     ( long "hyper-lag"
 94 |                   <> metavar "L"
 95 |                   <> value 10
 96 |                   <> help "Number of sweeps between hyperparameter estimations (must be multiple of --lag)"
 97 |                    )
 98 | 
 99 | class Binary ms => SamplerModel ms where
100 |     modelLikelihood :: ms -> Log Double
101 |     estimateHypers :: ms -> ms
102 |     summarizeHypers :: ms -> String
103 | 
104 | processSweep :: SamplerModel ms => SamplerOpts -> TVar (Log Double) -> Int -> ms -> IO ()
105 | processSweep opts lastMaxV sweepN m = do
106 |     let l = modelLikelihood m
107 |     putStr $ printf "Sweep %d: %f\n" sweepN (ln l)
108 |     appendFile (sweepsDir opts </> "likelihood.log")
109 |         $ printf "%d\t%f\n" sweepN (ln l)
110 |     when (sweepN >= burnin opts) $ do
111 |         newMax <- atomically $ do oldL <- readTVar lastMaxV
112 |                                   if l > oldL then writeTVar lastMaxV l >> return True
113 |                                               else return False
114 |         when (newMax && sweepN >= burnin opts) $
115 |             let fname = sweepsDir opts </> printf "%05d.state" sweepN
116 |             in encodeFile fname m
117 | 
118 | doEstimateHypers :: SamplerModel ms => SamplerOpts -> Int -> S.StateT ms IO ()
119 | doEstimateHypers opts@(SamplerOpts {hyperEstOpts=HyperEstOpts True burnin lag}) iterN
120 |     | iterN >= burnin && iterN `mod` lag == 0  = do
121 |         liftIO $ putStrLn "Parameter estimation"
122 |         m <- S.get
123 |         S.modify estimateHypers
124 |         m' <- S.get
125 |         void $ liftIO $ forkIO
126 |             $ appendFile (sweepsDir opts </> "hyperparams.log")
127 |             $ printf "%5d\t%f\t%f\t%s\n"
128 |                   iterN
129 |                   (ln $ modelLikelihood m)
130 |                   (ln $ modelLikelihood m')
131 |                   (summarizeHypers m')
132 | doEstimateHypers _ _  = return ()
133 | 
134 | withSystemRandomIO = withSystemRandom :: (GenIO -> IO a) -> IO a
135 | 
136 | samplerIter :: SamplerModel ms => SamplerOpts -> [WrappedUpdateUnit ms]
137 |             -> TMVar () -> TVar (Log Double)
138 |             -> Int -> S.StateT ms IO ()
139 | samplerIter opts uus processSweepRunning lastMaxV lagN = do
140 |     let sweepN = lagN * lag opts
141 |     shuffledUus <- liftIO $ withSystemRandomIO $ \mwc->runRVar (shuffle uus) mwc
142 |     let uus' = concat $ replicate (lag opts) shuffledUus
143 |     m <- S.get
144 |     S.put =<< liftIO (gibbsUpdate (nCaps opts) (updateBlock opts) m uus')
145 |     when (sweepN == burnin opts) $ liftIO $ putStrLn "Burn-in complete"
146 |     S.get >>= \m->do liftIO $ atomically $ takeTMVar processSweepRunning
147 |                      void $ liftIO $ forkIO $ do
148 |                          processSweep opts lastMaxV sweepN m
149 |                          liftIO $ atomically $ putTMVar processSweepRunning ()
150 | 
151 |     doEstimateHypers opts sweepN
152 | 
153 | checkOpts :: SamplerOpts -> IO ()
154 | checkOpts opts = do
155 |     let hyperOpts = hyperEstOpts opts
156 |     when (burnin opts `mod` lag opts /= 0)
157 |         $ error "--burnin must be multiple of --lag"
158 |     when (hyperEst hyperOpts && hyperBurnin hyperOpts `mod` lag opts /= 0)
159 |         $ error "--hyper-burnin must be multiple of --lag"
160 |     when (hyperEst hyperOpts && hyperLag hyperOpts `mod` lag opts /= 0)
161 |         $ error "--hyper-lag must be multiple of --lag"
162 | 
163 | runSampler :: SamplerModel ms => SamplerOpts -> ms -> [WrappedUpdateUnit ms] -> IO ()
164 | runSampler opts m uus = do
165 |     checkOpts opts
166 |     setNumCapabilities (nCaps opts)
167 |     createDirectoryIfMissing False (sweepsDir opts)
168 |     putStrLn "Starting sampler..."
169 |     putStrLn $ "Initial likelihood = "++show (ln $ modelLikelihood m)
170 |     putStrLn $ "Burning in for "++show (burnin opts)++" samples"
171 |     let lagNs = maybe [0..] (\n->[0..n `div` lag opts]) $ iterations opts
172 |     lastMaxV <- atomically $ newTVar 0
173 |     processSweepRunning <- atomically $ newTMVar ()
174 |     void $ S.runStateT (forM_ lagNs (samplerIter opts uus processSweepRunning lastMaxV)) m
175 |     atomically $ takeTMVar processSweepRunning
176 | 
177 | createSweeps :: SamplerOpts -> IO ()
178 | createSweeps (SamplerOpts {sweepsDir=sweepsDir}) = do
179 |     exists <- doesDirectoryExist sweepsDir
180 |     if exists
181 |       then error "Sweeps directory already exists"
182 |       else createDirectory sweepsDir
183 | 


--------------------------------------------------------------------------------
/network-topic-models/SerializeText.hs:
--------------------------------------------------------------------------------
 1 | module SerializeText () where
 2 | 
 3 | import           Control.Applicative
 4 | import           Data.Binary
 5 | import qualified Data.Text as T
 6 | import qualified Data.Text.Encoding as TE
 7 | 
 8 | -- FIXME: Why isn't there already an instance?
 9 | instance Binary T.Text where
10 |      put = put . TE.encodeUtf8
11 |      get = TE.decodeUtf8 <$> get
12 | 


--------------------------------------------------------------------------------
/network-topic-models/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/network-topic-models/TopicLanguageModel.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | 
  3 | import qualified Data.ByteString as BS
  4 | import qualified Data.Text as Text
  5 | import qualified Data.Foldable as F
  6 | import           Data.List (sort, sortBy, isSuffixOf)
  7 | import           Data.Function
  8 | import           Data.Map (Map)
  9 | import qualified Data.Map as M
 10 | import           Data.Monoid
 11 | import           Data.Tuple (swap)
 12 | import qualified Data.Traversable as T
 13 | import           Options.Applicative
 14 | 
 15 | import qualified Data.Text.Lazy.IO as TL
 16 | import           Data.Text.Lazy.Builder.Int
 17 | import qualified Data.Text.Lazy.Builder as TB
 18 | import           Data.Binary
 19 | 
 20 | import           System.Directory
 21 | import           System.FilePath ((</>), takeDirectory)
 22 | import           Text.Printf
 23 | 
 24 | import           BayesStack.Types
 25 | import           BayesStack.DirMulti
 26 | import           BayesStack.Models.Topic.LDARelevance
 27 | import           SerializeText
 28 | import           ReadData
 29 | import           FormatMultinom
 30 | 
 31 | data Opts = Opts { inputFiles   :: [FilePath]
 32 |                  , sweepNum     :: Int
 33 |                  , documents    :: [NodeName]
 34 |                  , topN         :: Maybe Int
 35 |                  }
 36 | 
 37 | opts = Opts
 38 |     <$> arguments pure
 39 |                   ( metavar "DIR|FILE"
 40 |                  <> help "The directory of sweeps to dump"
 41 |                   )
 42 |     <*> option       ( long "number"
 43 |                     <> short 'N'
 44 |                     <> value 10
 45 |                     <> metavar "N"
 46 |                     <> help "The number of sweeps to aggregate"
 47 |                      )
 48 |     <*> option       ( long "document"
 49 |                     <> short 'd'
 50 |                     <> reader (fmap ((:[]) . Text.pack) . str)
 51 |                     <> help "The documents to dump"
 52 |                      )
 53 |     <*> option       ( long "top"
 54 |                     <> short 'n'
 55 |                     <> value Nothing
 56 |                     <> reader (pure . auto)
 57 |                     <> metavar "N"
 58 |                     <> help "How many items to list for each"
 59 |                      )
 60 | 
 61 | readSweep :: FilePath -> IO MState
 62 | readSweep = decodeFile
 63 | 
 64 | readNetData :: FilePath -> IO NetData
 65 | readNetData = decodeFile
 66 | 
 67 | readPhi' :: Node -> FilePath -> IO (Map Item Double)
 68 | readPhi' n filePath = f <$> readSweep filePath
 69 |   where f s = M.unionsWith (+) $ do
 70 |             (pt,t) <- F.toList $ probabilities $ stThetas s M.! n
 71 |             (px,x) <- F.toList $ probabilities $ stPhis s M.! t
 72 |             return $ M.singleton x (pt*px)
 73 | 
 74 | readPhi :: [NodeName] -> FilePath -> IO (Map NodeName (Map Term Double))
 75 | readPhi nodes filePath = do
 76 |     let sweepDir = takeDirectory filePath
 77 |     itemMap <- readItemMap sweepDir
 78 |     nodeMap <- readNodeMap sweepDir
 79 |     let invNodeMap = M.fromList $ map swap $ M.assocs nodeMap
 80 |     let f n = let n' = invNodeMap M.! n
 81 |               in M.singleton n . M.mapKeys (itemMap M.!) <$> readPhi' n' filePath
 82 |     M.unionsWith (M.unionWith (+)) <$> mapM f nodes
 83 | 
 84 | getSweeps :: FilePath -> IO [FilePath]
 85 | getSweeps sweepsDir =
 86 |     map (sweepsDir </>) . sort . filter (".state" `isSuffixOf`)
 87 |     <$> getDirectoryContents sweepsDir
 88 | 
 89 | main = do
 90 |     args <- execParser $ info (helper <*> opts)
 91 |          ( fullDesc
 92 |         <> progDesc "Dump distributions from an LDA sweep"
 93 |         <> header "dump-lda - Dump distributions from an LDA sweep"
 94 |          )
 95 | 
 96 |     --nd <- readNetData $ sweepDir args </> "data"
 97 | 
 98 |     files <- T.forM (inputFiles args) $ \fname->do
 99 |         isDir <- doesDirectoryExist fname
100 |         isFile <- doesFileExist fname
101 |         case () of
102 |             _ | isFile    ->
103 |                 return [fname]
104 |             _ | isDir     ->
105 |                 take (sweepNum args) . reverse <$> getSweeps fname
106 |             _ | otherwise -> error "oops"
107 | 
108 |     sweeps <- T.forM (concat files) $ readPhi (documents args)
109 |     let sweeps' = M.unionsWith (M.unionWith (+)) sweeps
110 |                 :: Map NodeName (Map Term Double)
111 |         takeTopN = maybe id take $ topN args
112 |     F.forM_ (M.assocs sweeps') $ \(n,items)->do
113 |         print n
114 |         F.forM_ (takeTopN $ sortBy (flip compare `on` snd) $ M.assocs items) $ \(x,p)->do
115 |             printf "\t%s\t%1.2e\n" (show x) p
116 |         putStrLn ""
117 | 


--------------------------------------------------------------------------------
/network-topic-models/doc/models/model_ci.tex:
--------------------------------------------------------------------------------
  1 | % model_citation_influence.tex
  2 | %
  3 | % Copyright (C) 2010,2011 Laura Dietz
  4 | % Copyright (C) 2012 Jaakko Luttinen
  5 | %
  6 | % This file may be distributed and/or modified
  7 | %
  8 | % 1. under the LaTeX Project Public License and/or
  9 | % 2. under the GNU General Public License.
 10 | %
 11 | % See the files LICENSE_LPPL and LICENSE_GPL for more details.
 12 | 
 13 | % Citation influence model
 14 | % Cite this model as 
 15 | % Laura Dietz, Steffen Bickel, Tobias Scheffer. 
 16 | % Unsupervised Prediction of Citation Influences. 
 17 | % In: Proceedings of International Conference on Machine Learning. 2007
 18 | 
 19 | %\beginpgfgraphicnamed{model-citation-influence}
 20 | \begin{tikzpicture}
 21 | 
 22 |   % Layout the variables
 23 |   \matrix[row sep=0.5cm, column sep=1.2cm] (LDA)
 24 |   { %
 25 |     & %
 26 |     & %
 27 |     \node[latent] (psi)    {$\psi$} ; & %
 28 |     \node[latent] (gamma)  {$\gamma$} ; & %
 29 |     \node[latent] (omega)  {$\Omega$} ; & %
 30 |     \\
 31 |     \\
 32 |     \node[latent] (lambda) {$\lambda$} ; & %
 33 |     & %
 34 |     \node[latent] (C)      {$C$} ; & %
 35 |     \node[latent] (S)      {$S$} ; & %
 36 |     \\
 37 |     & %
 38 |     & %
 39 |     \factor       {T-f1}   {Multi} {} {}; & %
 40 |     & %
 41 |     \factor       {T-f2}   {below:Multi} {} {}; & %
 42 |     \\
 43 |     \node[latent] (T')     {$T'$} ; & %
 44 |     & %
 45 |     \node[latent] (T)      {$T$} ; & %
 46 |     \\
 47 |     \factor       {X'-f}   {Multi} {} {}; & %
 48 |     \node[latent] (phi)    {$\phi$} ; & %
 49 |     \factor       {X-f}    {Multi} {} {}; %
 50 |     \\
 51 |     \node[obs] (X')        {$X'$} ; & %
 52 |     & %
 53 |     \node[obs] (X)         {$X$} ;
 54 |     \\
 55 |   };
 56 | 
 57 |   % Remaining factors
 58 |   \factor[above=of T']     {T'-f}     {left:Multi} {} {}; %
 59 |   \factor[above=of lambda] {lambda-f} {left:Dir} {} {}; %
 60 |   \factor[above=of C]      {C-f}      {left:Multi} {} {}; %
 61 |   \factor[above=of S]      {S-f}      {left:Bern} {} {}; %
 62 |   \factor[above=of psi]    {psi-f}    {left:Dir} {} {}; %
 63 |   \factor[above=of gamma]  {gamma-f}  {left:Beta} {} {}; %
 64 |   \factor[above=of omega]  {omega-f}  {left:Dir} {} {}; %
 65 |   \factor[above=of phi]    {phi-f}    {left:Dir} {} {}; %
 66 | 
 67 |   % Hyperparameters
 68 |   \node[const, above=of lambda] (alambda) {$\alpha_\lambda$}; %
 69 |   \node[const, above=of phi]    (aphi)    {$\alpha_\phi$}; %
 70 |   \node[const, above=of psi]    (apsi)    {$\alpha_\psi$}; %
 71 |   \node[const, above=of gamma, xshift=-0.5cm] (agamma1)
 72 |   {$\alpha_{\gamma_\omega}$}; %
 73 |   \node[const, above=of gamma, xshift=0.5cm] (agamma2)
 74 |   {$\alpha_{\gamma_\psi}$}; %
 75 |   \node[const, above=of omega]  (aomega)    {$\alpha_\omega$}; %
 76 | 
 77 |   % Factor connections
 78 |   \factoredge {phi}               {X'-f}     {X'} ; %
 79 |   \factoredge {phi}               {X-f}      {X} ; %
 80 |   \factoredge {lambda}            {T'-f}     {T'} ; %
 81 |   \factoredge {lambda}            {T-f1}     {T} ; %
 82 |   \factoredge {omega}             {T-f2}     {T} ; %
 83 |   \factoredge {alambda}           {lambda-f} {lambda} ; %
 84 |   \factoredge {psi}               {C-f}      {C} ; %
 85 |   \factoredge {gamma}             {S-f}      {S} ; %
 86 |   \factoredge {apsi}              {psi-f}    {psi} ; %
 87 |   \factoredge {agamma1,agamma2}   {gamma-f}  {gamma} ; %
 88 |   \factoredge {aomega}            {omega-f}  {omega} ; %
 89 |   \factoredge {aphi}              {phi-f}    {phi} ; %
 90 | 
 91 |   % Gates
 92 |   \gate {X'-gate} {(X'-f)(X'-f-caption)} {T'} ; %
 93 |   \gate {X-gate}  {(X-f)(X-f-caption)}   {T} ; %
 94 |   \gate {T-gate}  {(T-f1)(T-f1-caption)} {C} ;
 95 |   \vgate {T-vgate} %
 96 |   {(T-gate)} {$S=0$} %
 97 |   {(T-f2)(T-f2-caption)} {$S=1$} %
 98 |   {S} ; %
 99 | 
100 |   % Plates
101 |   \plate {LDA1} { %
102 |     (T')(T'-f)(T'-f-caption) %
103 |     (X')(X'-gate) %
104 |   } {$\forall w' \in c$} ; %
105 | 
106 |   \plate {LDA2} { %
107 |     (LDA1) %
108 |     (lambda)(lambda-f)(lambda-f-caption) %
109 |   } {$\forall c \in \mathcal{C}$} ; %
110 | 
111 |   \plate {} { %
112 |     (phi)(phi-f)(phi-f-caption) %
113 |   } {$\forall t \in \mathcal{T}$} ; %
114 | 
115 |   \plate {P1} { %
116 |     (X)(X-gate) %
117 |     (T)(T-vgate) %
118 |     (C)(C-f)(C-f-caption) %
119 |     (S)(S-f)(S-f-caption) %
120 |   } {$\forall w \in d$} ;
121 | 
122 |   \plate {} { %
123 |     (P1) %
124 |     (psi)(psi-f)(psi-f-caption) %
125 |     (gamma)(gamma-f)(gamma-f-caption) %
126 |     (omega)(omega-f)(omega-f-caption) %
127 |   } {$\forall d \in \mathcal{D}$} ; %
128 | 
129 | \end{tikzpicture}
130 | %\endpgfgraphicnamed
131 | 
132 | %%% Local Variables: 
133 | %%% mode: tex-pdf
134 | %%% TeX-master: "example"
135 | %%% End: 
136 | 


--------------------------------------------------------------------------------
/network-topic-models/doc/models/model_ci_nt.tex:
--------------------------------------------------------------------------------
  1 | % model_citation_influence.tex
  2 | %
  3 | % Copyright (C) 2010,2011 Laura Dietz
  4 | % Copyright (C) 2012 Jaakko Luttinen
  5 | %
  6 | % This file may be distributed and/or modified
  7 | %
  8 | % 1. under the LaTeX Project Public License and/or
  9 | % 2. under the GNU General Public License.
 10 | %
 11 | % See the files LICENSE_LPPL and LICENSE_GPL for more details.
 12 | 
 13 | % Citation influence model
 14 | % Cite this model as 
 15 | % Laura Dietz, Steffen Bickel, Tobias Scheffer. 
 16 | % Unsupervised Prediction of Citation Influences. 
 17 | % In: Proceedings of International Conference on Machine Learning. 2007
 18 | 
 19 | %\beginpgfgraphicnamed{model-citation-influence}
 20 | \begin{tikzpicture}
 21 | 
 22 |   % Layout the variables
 23 |   \matrix[row sep=0.5cm, column sep=1.2cm] (LDA)
 24 |   { %
 25 |     & %
 26 |     & %
 27 |     \node[latent] (psi)    {$\psi$} ; & %
 28 |     \node[latent] (gamma)  {$\gamma$} ; & %
 29 |     \node[latent] (omega)  {$\Omega$} ; & %
 30 |     \\
 31 |     \\
 32 |     \node[latent] (lambda) {$\lambda$} ; & %
 33 |     & %
 34 |     \node[latent] (C)      {$C$} ; & %
 35 |     \node[latent] (S)      {$S$} ; & %
 36 |     \\
 37 |     \factor       {X'-f}   {left:Multi} {} {}; & %
 38 |     & %
 39 |     \factor       {X-f1}   {Multi} {} {}; & %
 40 |     & %
 41 |     \factor       {X-f2}   {below:Multi} {} {}; & %
 42 |     \\
 43 |     \node[obs] (X')        {$X'$} ; & %
 44 |     & %
 45 |     \node[obs] (X)         {$X$} ;
 46 |     \\
 47 |   };
 48 | 
 49 |   % Remaining factors
 50 |   \factor[above=of lambda] {lambda-f} {left:Dir} {} {}; %
 51 |   \factor[above=of C]      {C-f}      {left:Multi} {} {}; %
 52 |   \factor[above=of S]      {S-f}      {left:Bern} {} {}; %
 53 |   \factor[above=of psi]    {psi-f}    {left:Dir} {} {}; %
 54 |   \factor[above=of gamma]  {gamma-f}  {left:Beta} {} {}; %
 55 |   \factor[above=of omega]  {omega-f}  {left:Dir} {} {}; %
 56 | 
 57 |   % Hyperparameters
 58 |   \node[const, above=of lambda] (alambda) {$\alpha_\lambda$}; %
 59 |   \node[const, above=of psi]    (apsi)    {$\alpha_\psi$}; %
 60 |   \node[const, above=of gamma, xshift=-0.5cm] (agamma1)
 61 |   {$\alpha_{\gamma_\omega}$}; %
 62 |   \node[const, above=of gamma, xshift=0.5cm] (agamma2)
 63 |   {$\alpha_{\gamma_\psi}$}; %
 64 |   \node[const, above=of omega]  (aomega)    {$\alpha_\omega$}; %
 65 | 
 66 |   % Factor connections
 67 |   \factoredge {lambda}            {X'-f}     {X'} ; %
 68 |   \factoredge {lambda}            {X-f1}     {X} ; %
 69 |   \factoredge {omega}             {X-f2}     {X} ; %
 70 |   \factoredge {alambda}           {lambda-f} {lambda} ; %
 71 |   \factoredge {psi}               {C-f}      {C} ; %
 72 |   \factoredge {gamma}             {S-f}      {S} ; %
 73 |   \factoredge {apsi}              {psi-f}    {psi} ; %
 74 |   \factoredge {agamma1,agamma2}   {gamma-f}  {gamma} ; %
 75 |   \factoredge {aomega}            {omega-f}  {omega} ; %
 76 | 
 77 |   % Gates
 78 |   \gate {X-gate}  {(X-f1)(X-f1-caption)} {C} ;
 79 |   \vgate {X-vgate} %
 80 |   {(X-gate)} {$S=0$} %
 81 |   {(X-f2)(X-f2-caption)} {$S=1$} %
 82 |   {S} ; %
 83 | 
 84 |   % Plates
 85 |   \plate {LDA1} { %
 86 |     (X'-f)(X'-f-caption) %
 87 |     (X')(X'-gate) %
 88 |   } {$\forall w' \in c$} ; %
 89 | 
 90 |   \plate {LDA2} { %
 91 |     (LDA1) %
 92 |     (lambda)(lambda-f)(lambda-f-caption) %
 93 |   } {$\forall c \in \mathcal{C}$} ; %
 94 | 
 95 |   \plate {P1} { %
 96 |     (X)(X-gate) %
 97 |     (T)(T-vgate) %
 98 |     (C)(C-f)(C-f-caption) %
 99 |     (S)(S-f)(S-f-caption) %
100 |   } {$\forall w \in d$} ;
101 | 
102 |   \plate {} { %
103 |     (P1) %
104 |     (psi)(psi-f)(psi-f-caption) %
105 |     (gamma)(gamma-f)(gamma-f-caption) %
106 |     (omega)(omega-f)(omega-f-caption) %
107 |   } {$\forall d \in \mathcal{D}$} ; %
108 | 
109 | \end{tikzpicture}
110 | %\endpgfgraphicnamed
111 | 
112 | %%% Local Variables: 
113 | %%% mode: tex-pdf
114 | %%% TeX-master: "example"
115 | %%% End: 
116 | 


--------------------------------------------------------------------------------
/network-topic-models/doc/models/model_lda.tex:
--------------------------------------------------------------------------------
 1 | % model_lda.tex
 2 | %
 3 | % Copyright (C) 2010,2011 Laura Dietz
 4 | % Copyright (C) 2012 Jaakko Luttinen
 5 | %
 6 | % This file may be distributed and/or modified
 7 | %
 8 | % 1. under the LaTeX Project Public License and/or
 9 | % 2. under the GNU General Public License.
10 | %
11 | % See the files LICENSE_LPPL and LICENSE_GPL for more details.
12 | 
13 | % Latent Diriclet allocation model
14 | 
15 | %\beginpgfgraphicnamed{model-lda}
16 | \begin{tikzpicture}[x=1.7cm,y=1.8cm]
17 | 
18 |   % Nodes
19 | 
20 |   \node[obs]                   (X)      {$X$} ; %
21 |   \node[latent, above=of X]    (T)      {$T$} ; %
22 |   \node[latent, above=of T]    (theta)  {$\theta$}; %
23 |   \node[const, above=of theta] (atheta) {$\alpha_\theta$};
24 | 
25 | 
26 |   % Factors
27 |   \factor[above=of X]     {X-f}     {Multi} {} {} ; %
28 |   \factor[above=of T]     {T-f}     {left:Multi} {} {} ; %
29 |   \factor[above=of theta] {theta-f} {left:Dir} {} {} ; %
30 | 
31 |   % More nodes
32 |   \node[latent, right=of X-f] (phi)  {$\phi$}; %
33 |   \node[const, above=of phi]  (aphi) {$\alpha_\phi$}; %
34 | 
35 |   \factor[above=of phi] {phi-f} {right:Dir} {} {} ; %
36 | 
37 |   \factoredge {theta}  {T-f}     {T} ; %
38 |   \factoredge {atheta} {theta-f} {theta} ; %
39 |   \factoredge {phi}    {X-f}     {X} ; %
40 |   \factoredge {aphi}   {phi-f}   {phi} ; %
41 | 
42 |   \gate {X-gate} {(X-f)(X-f-caption)} {T}
43 | 
44 |   \plate {plate1} { %
45 |     (X)(X-gate) %
46 |     (T)(T-f)(T-f-caption) %
47 |   } {$\forall 1 \leq i \leq n_d$}; %
48 |   \plate {} { %
49 |     (plate1) %
50 |     (theta)(theta-f)(theta-f-caption) %
51 |   } {$\forall d \in \mathcal{D}$} ; %
52 |   \plate {} { %
53 |     (phi)(phi-f)(phi-f-caption) %
54 |   } {$\forall t \in \mathcal{T}$} ; %
55 | 
56 | \end{tikzpicture}
57 | %\endpgfgraphicnamed
58 | 
59 | %%% Local Variables: 
60 | %%% mode: tex-pdf
61 | %%% TeX-master: "example"
62 | %%% End: 
63 | 


--------------------------------------------------------------------------------
/network-topic-models/doc/models/model_st.tex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bgamari/bayes-stack/020df7bb7263104fdea254e57d6c7daf7806da3e/network-topic-models/doc/models/model_st.tex


--------------------------------------------------------------------------------
/network-topic-models/doc/models/models.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[a4paper]{article}
 2 | 
 3 | \usepackage{tikz}
 4 | \usetikzlibrary{bayesnet}
 5 | 
 6 | \begin{document}
 7 | 
 8 | \section{Latent dirichlet allocation}
 9 | \input{model_lda}
10 | 
11 | \section{Latent dirichlet allocation (relevance)}
12 | This model is a variant on LDA which allows arbitrary item-wise
13 | weights to be assigned. This can be useful in document relevance
14 | scoring.
15 | 
16 | \section{Shared taste model}
17 | \input{model_st}
18 |   
19 | \section{Citation influence model}
20 | \input{model_ci}
21 | 
22 | \section{Citation influence model with no topics}
23 | \input{model_ci_nt}
24 | 
25 | \end{document}
26 | 


--------------------------------------------------------------------------------
/network-topic-models/doc/models/tikzlibrarybayesnet.code.tex:
--------------------------------------------------------------------------------
  1 | % tikzlibrary.code.tex
  2 | %
  3 | % Copyright 2010-2011 by Laura Dietz
  4 | % Copyright 2012 by Jaakko Luttinen
  5 | %
  6 | % This file may be distributed and/or modified
  7 | %
  8 | % 1. under the LaTeX Project Public License and/or
  9 | % 2. under the GNU General Public License.
 10 | %
 11 | % See the files LICENSE_LPPL and LICENSE_GPL for more details.
 12 | 
 13 | % Load other libraries
 14 | \usetikzlibrary{shapes}
 15 | \usetikzlibrary{fit}
 16 | \usetikzlibrary{chains}
 17 | \usetikzlibrary{arrows}
 18 | 
 19 | % Latent node
 20 | \tikzstyle{latent} = [circle,fill=white,draw=black,inner sep=1pt,
 21 | minimum size=20pt, font=\fontsize{10}{10}\selectfont, node distance=1]
 22 | % Observed node
 23 | \tikzstyle{obs} = [latent,fill=gray!25]
 24 | % Constant node
 25 | \tikzstyle{const} = [rectangle, inner sep=0pt, node distance=1]
 26 | % Factor node
 27 | \tikzstyle{factor} = [rectangle, fill=black,minimum size=5pt, inner
 28 | sep=0pt, node distance=0.4]
 29 | % Deterministic node
 30 | \tikzstyle{det} = [latent, diamond]
 31 | 
 32 | % Plate node
 33 | \tikzstyle{plate} = [draw, rectangle, rounded corners, fit=#1]
 34 | % Invisible wrapper node
 35 | \tikzstyle{wrap} = [inner sep=0pt, fit=#1]
 36 | % Gate
 37 | \tikzstyle{gate} = [draw, rectangle, dashed, fit=#1]
 38 | 
 39 | % Caption node
 40 | \tikzstyle{caption} = [font=\footnotesize, node distance=0] %
 41 | \tikzstyle{plate caption} = [caption, node distance=0, inner sep=0pt,
 42 | below left=5pt and 0pt of #1.south east] %
 43 | \tikzstyle{factor caption} = [caption] %
 44 | \tikzstyle{every label} += [caption] %
 45 | 
 46 | \tikzset{>={triangle 45}}
 47 | 
 48 | %\pgfdeclarelayer{b}
 49 | %\pgfdeclarelayer{f}
 50 | %\pgfsetlayers{b,main,f}
 51 | 
 52 | % \factoredge [options] {inputs} {factors} {outputs}
 53 | \newcommand{\factoredge}[4][]{ %
 54 |   % Connect all nodes #2 to all nodes #4 via all factors #3.
 55 |   \foreach \f in {#3} { %
 56 |     \foreach \x in {#2} { %
 57 |       \draw[-,#1] (\x) edge[-] (\f) ; %
 58 |     } ;
 59 |     \foreach \y in {#4} { %
 60 |       \draw[->,#1] (\f) -- (\y) ; %
 61 |     } ;
 62 |   } ;
 63 | }
 64 | 
 65 | % \edge [options] {inputs} {outputs}
 66 | \newcommand{\edge}[3][]{ %
 67 |   % Connect all nodes #2 to all nodes #3.
 68 |   \foreach \x in {#2} { %
 69 |     \foreach \y in {#3} { %
 70 |       \draw[->,#1] (\x) -- (\y) ;%
 71 |     } ;
 72 |   } ;
 73 | }
 74 | 
 75 | % \factor [options] {name} {caption} {inputs} {outputs}
 76 | \newcommand{\factor}[5][]{ %
 77 |   % Draw the factor node. Use alias to allow empty names.
 78 |   \node[factor, label={[name=#2-caption]#3}, name=#2, #1,
 79 |   alias=#2-alias] {} ; %
 80 |   % Connect all inputs to outputs via this factor
 81 |   \factoredge {#4} {#2-alias} {#5} ; %
 82 | }
 83 | 
 84 | % \plate [options] {name} {fitlist} {caption}
 85 | \newcommand{\plate}[4][]{ %
 86 |   \node[wrap=#3] (#2-wrap) {}; %
 87 |   \node[plate caption=#2-wrap] (#2-caption) {#4}; %
 88 |   \node[plate=(#2-wrap)(#2-caption), #1] (#2) {}; %
 89 | }
 90 | 
 91 | % \gate [options] {name} {fitlist} {inputs}
 92 | \newcommand{\gate}[4][]{ %
 93 |   \node[gate=#3, name=#2, #1, alias=#2-alias] {}; %
 94 |   \foreach \x in {#4} { %
 95 |     \draw [-*,thick] (\x) -- (#2-alias); %
 96 |   } ;%
 97 | }
 98 | 
 99 | % \vgate {name} {fitlist-left} {caption-left} {fitlist-right}
100 | % {caption-right} {inputs}
101 | \newcommand{\vgate}[6]{ %
102 |   % Wrap the left and right parts
103 |   \node[wrap=#2] (#1-left) {}; %
104 |   \node[wrap=#4] (#1-right) {}; %
105 |   % Draw the gate
106 |   \node[gate=(#1-left)(#1-right)] (#1) {}; %
107 |   % Add captions
108 |   \node[caption, below left=of #1.north ] (#1-left-caption)
109 |   {#3}; %
110 |   \node[caption, below right=of #1.north ] (#1-right-caption)
111 |   {#5}; %
112 |   % Draw middle separation
113 |   \draw [-, dashed] (#1.north) -- (#1.south); %
114 |   % Draw inputs
115 |   \foreach \x in {#6} { %
116 |     \draw [-*,thick] (\x) -- (#1); %
117 |   } ;%
118 | }
119 | 
120 | % \hgate {name} {fitlist-top} {caption-top} {fitlist-bottom}
121 | % {caption-bottom} {inputs}
122 | \newcommand{\hgate}[6]{ %
123 |   % Wrap the left and right parts
124 |   \node[wrap=#2] (#1-top) {}; %
125 |   \node[wrap=#4] (#1-bottom) {}; %
126 |   % Draw the gate
127 |   \node[gate=(#1-top)(#1-bottom)] (#1) {}; %
128 |   % Add captions
129 |   \node[caption, above right=of #1.west ] (#1-top-caption)
130 |   {#3}; %
131 |   \node[caption, below right=of #1.west ] (#1-bottom-caption)
132 |   {#5}; %
133 |   % Draw middle separation
134 |   \draw [-, dashed] (#1.west) -- (#1.east); %
135 |   % Draw inputs
136 |   \foreach \x in {#6} { %
137 |     \draw [-*,thick] (\x) -- (#1); %
138 |   } ;%
139 | }
140 | 
141 | 


--------------------------------------------------------------------------------
/network-topic-models/network-topic-models.cabal:
--------------------------------------------------------------------------------
  1 | -- Initial bayes-stack-topic-models.cabal generated by cabal init.  For
  2 | -- further documentation, see http://haskell.org/cabal/users-guide/
  3 | 
  4 | name:                network-topic-models
  5 | version:             0.2.0.1
  6 | synopsis:            A few network topic model implementations for bayes-stack
  7 | description:         Implementations of a few network topic models build upon bayes-stack.
  8 |                      The package includes Latent Dirichlet Allocation
  9 |                      (LDA), the shared taste model, and the citation
 10 |                      influence model.
 11 | homepage:            https://github.com/bgamari/bayes-stack
 12 | license:             BSD3
 13 | license-file:        LICENSE
 14 | author:              Ben Gamari
 15 | maintainer:          bgamari.foss@gmail.com
 16 | copyright:           Copyright (c) 2012 Ben Gamari
 17 | category:            Math
 18 | build-type:          Simple
 19 | cabal-version:       >=1.8
 20 | 
 21 | source-repository head
 22 |   type:                 git
 23 |   location:             https://github.com/bgamari/bayes-stack.git
 24 | 
 25 | executable bayes-stack-lda
 26 |   main-is:             RunLDA.hs
 27 |   ghc-options:         -threaded
 28 |   build-depends:       base >=4.6 && <4.8,
 29 |                        stm >= 2.4 && < 2.5,
 30 |                        containers >= 0.5 && < 0.6,
 31 |                        deepseq >= 1.3 && < 2.0,
 32 |                        bytestring >= 0.10 && < 0.11,
 33 |                        binary >= 0.7 && < 0.8,
 34 |                        vector >=0.9 && <0.11,
 35 |                        transformers >= 0.3 && < 0.4,
 36 |                        filepath >=1.3 && < 1.4,
 37 |                        directory,
 38 |                        text >= 0.11 && < 0.12,
 39 |                        optparse-applicative >=0.5,
 40 |                        statistics ==0.10.*,
 41 |                        log-domain >= 0.6,
 42 |                        mwc-random >= 0.12 && < 0.13,
 43 |                        random-fu ==0.2.*,
 44 |                        bayes-stack >= 0.2 && < 0.3
 45 | 
 46 | executable bayes-stack-lda-rel
 47 |   main-is:             RunLDARelevance.hs
 48 |   ghc-options:         -threaded
 49 |   build-depends:       base >=4.6 && <4.8,
 50 |                        stm >= 2.4 && < 2.5,
 51 |                        containers >= 0.5 && < 0.6,
 52 |                        deepseq >= 1.3 && < 2.0,
 53 |                        bytestring >= 0.10 && < 0.11,
 54 |                        binary >= 0.7 && < 0.8,
 55 |                        vector >=0.9 && <0.11,
 56 |                        transformers >= 0.3 && < 0.4,
 57 |                        filepath >=1.3 && < 1.4,
 58 |                        directory,
 59 |                        text >= 0.11 && < 0.12,
 60 |                        attoparsec >= 0.10 && < 1.0,
 61 |                        optparse-applicative >=0.5,
 62 |                        statistics ==0.10.*,
 63 |                        log-domain >= 0.6,
 64 |                        mwc-random >= 0.12 && < 0.13,
 65 |                        random-fu ==0.2.*,
 66 |                        bayes-stack >= 0.2 && < 0.3
 67 | 
 68 | executable bayes-stack-topic-language-model
 69 |   main-is:             TopicLanguageModel.hs
 70 |   ghc-options:         -threaded
 71 |   build-depends:       base >=4.6 && <4.8,
 72 |                        stm >= 2.4 && < 2.5,
 73 |                        containers >= 0.5 && < 0.6,
 74 |                        deepseq >= 1.3 && < 2.0,
 75 |                        bytestring >= 0.10 && < 0.11,
 76 |                        binary >= 0.7 && < 0.8,
 77 |                        vector >=0.9 && <0.11,
 78 |                        transformers >= 0.3 && < 0.4,
 79 |                        filepath >=1.3 && < 1.4,
 80 |                        directory,
 81 |                        text >= 0.11 && < 0.12,
 82 |                        attoparsec >= 0.10 && < 1.0,
 83 |                        optparse-applicative >=0.5,
 84 |                        statistics ==0.10.*,
 85 |                        log-domain >= 0.6,
 86 |                        mwc-random >= 0.12 && < 0.13,
 87 |                        random-fu ==0.2.*,
 88 |                        bayes-stack >= 0.2 && < 0.3
 89 | 
 90 | executable bayes-stack-st
 91 |   main-is:             RunST.hs
 92 |   ghc-options:         -threaded
 93 |   build-depends:       base >=4.6 && <4.8,
 94 |                        stm >= 2.4 && < 2.5,
 95 |                        containers >= 0.5 && < 0.6,
 96 |                        deepseq >= 1.3 && < 2.0,
 97 |                        bytestring >= 0.10 && < 0.11,
 98 |                        binary >= 0.7 && < 0.8,
 99 |                        vector >=0.9 && <0.11,
100 |                        transformers >= 0.3 && < 0.4,
101 |                        filepath >=1.3 && < 1.4,
102 |                        directory,
103 |                        text >= 0.11 && < 0.12,
104 |                        optparse-applicative >=0.5,
105 |                        statistics ==0.10.*,
106 |                        log-domain >= 0.6,
107 |                        mwc-random >= 0.12 && < 0.13,
108 |                        random-fu ==0.2.*,
109 |                        bayes-stack >= 0.2 && < 0.3
110 | 
111 | executable bayes-stack-ci
112 |   main-is:             RunCI.hs
113 |   ghc-options:         -threaded
114 |   build-depends:       base >=4.6 && <4.8,
115 |                        stm >= 2.4 && < 2.5,
116 |                        containers >= 0.5 && < 0.6,
117 |                        deepseq >= 1.3 && < 2.0,
118 |                        bytestring >= 0.10 && < 0.11,
119 |                        binary >= 0.7 && < 0.8,
120 |                        vector >=0.9 && <0.11,
121 |                        transformers >= 0.3 && < 0.4,
122 |                        filepath >=1.3 && < 1.4,
123 |                        directory,
124 |                        text >= 0.11 && < 0.12,
125 |                        optparse-applicative >=0.5,
126 |                        statistics ==0.10.*,
127 |                        log-domain >= 0.6,
128 |                        mwc-random >= 0.12 && < 0.13,
129 |                        random-fu ==0.2.*,
130 |                        bayes-stack >= 0.2 && < 0.3
131 | 
132 | executable bayes-stack-cint
133 |   main-is:             RunCINT.hs
134 |   ghc-options:         -threaded
135 |   build-depends:       base >=4.6 && <4.8,
136 |                        stm >= 2.4 && < 2.5,
137 |                        containers >= 0.5 && < 0.6,
138 |                        deepseq >= 1.3 && < 2.0,
139 |                        bytestring >= 0.10 && < 0.11,
140 |                        binary >= 0.7 && < 0.8,
141 |                        vector >=0.9 && <0.11,
142 |                        transformers >= 0.3 && < 0.4,
143 |                        filepath >=1.3 && < 1.4,
144 |                        directory,
145 |                        text >= 0.11 && < 0.12,
146 |                        optparse-applicative >=0.5,
147 |                        statistics ==0.10.*,
148 |                        log-domain >= 0.6,
149 |                        mwc-random >= 0.12 && < 0.13,
150 |                        random-fu ==0.2.*,
151 |                        bayes-stack >= 0.2 && < 0.3,
152 |                        lens
153 | 
154 | executable bayes-stack-dump-lda
155 |   main-is:             DumpLDA.hs
156 |   build-depends:       base >=4.6 && <4.8,
157 |                        stm >= 2.4 && < 2.5,
158 |                        containers >= 0.5 && < 0.6,
159 |                        deepseq >= 1.3 && < 2.0,
160 |                        bytestring >= 0.10 && < 0.11,
161 |                        binary >= 0.7 && < 0.8,
162 |                        vector >=0.9 && <0.11,
163 |                        transformers >= 0.3 && < 0.4,
164 |                        filepath >=1.3 && < 1.4,
165 |                        directory,
166 |                        text >= 0.11 && < 0.12,
167 |                        optparse-applicative >=0.5,
168 |                        statistics ==0.10.*,
169 |                        log-domain >= 0.6,
170 |                        mwc-random >= 0.12 && < 0.13,
171 |                        random-fu ==0.2.*,
172 |                        bayes-stack >= 0.2 && < 0.3
173 | 
174 | executable bayes-stack-dump-st
175 |   main-is:             DumpST.hs
176 |   build-depends:       base >=4.6 && <4.8,
177 |                        stm >= 2.4 && < 2.5,
178 |                        containers >= 0.5 && < 0.6,
179 |                        deepseq >= 1.3 && < 2.0,
180 |                        bytestring >= 0.10 && < 0.11,
181 |                        binary >= 0.7 && < 0.8,
182 |                        vector >=0.9 && <0.11,
183 |                        transformers >= 0.3 && < 0.4,
184 |                        filepath >=1.3 && < 1.4,
185 |                        directory,
186 |                        text >= 0.11 && < 0.12,
187 |                        optparse-applicative >=0.5,
188 |                        statistics ==0.10.*,
189 |                        log-domain >= 0.6,
190 |                        mwc-random >= 0.12 && < 0.13,
191 |                        random-fu ==0.2.*,
192 |                        bayes-stack >= 0.2 && < 0.3
193 | 
194 | executable bayes-stack-dump-ci
195 |   main-is:             DumpCI.hs
196 |   build-depends:       base >=4.6 && <4.8,
197 |                        stm >= 2.4 && < 2.5,
198 |                        containers >= 0.5 && < 0.6,
199 |                        deepseq >= 1.3 && < 2.0,
200 |                        bytestring >= 0.10 && < 0.11,
201 |                        binary >= 0.7 && < 0.8,
202 |                        vector >=0.9 && <0.11,
203 |                        transformers >= 0.3 && < 0.4,
204 |                        filepath >=1.3 && < 1.4,
205 |                        directory,
206 |                        text >= 0.11 && < 0.12,
207 |                        optparse-applicative >=0.5,
208 |                        statistics ==0.10.*,
209 |                        log-domain >= 0.6,
210 |                        mwc-random >= 0.12 && < 0.13,
211 |                        random-fu ==0.2.*,
212 |                        bayes-stack >= 0.2 && < 0.3
213 | 
214 | executable bayes-stack-dump-cint
215 |   main-is:             DumpCINT.hs
216 |   build-depends:       base >=4.6 && <4.8,
217 |                        stm >= 2.4 && < 2.5,
218 |                        containers >= 0.5 && < 0.6,
219 |                        deepseq >= 1.3 && < 2.0,
220 |                        bytestring >= 0.10 && < 0.11,
221 |                        binary >= 0.7 && < 0.8,
222 |                        vector >=0.9 && <0.11,
223 |                        transformers >= 0.3 && < 0.4,
224 |                        filepath >=1.3 && < 1.4,
225 |                        directory,
226 |                        text >= 0.11 && < 0.12,
227 |                        optparse-applicative >=0.5,
228 |                        statistics ==0.10.*,
229 |                        log-domain >= 0.6,
230 |                        mwc-random >= 0.12 && < 0.13,
231 |                        random-fu ==0.2.*,
232 |                        bayes-stack >= 0.2 && < 0.3,
233 |                        lens
234 | 
235 | executable bayes-stack-dump-lda-rel
236 |   main-is:             DumpLDARelevance.hs
237 |   build-depends:       base >=4.6 && <4.8,
238 |                        stm >= 2.4 && < 2.5,
239 |                        containers >= 0.5 && < 0.6,
240 |                        deepseq >= 1.3 && < 2.0,
241 |                        bytestring >= 0.10 && < 0.11,
242 |                        binary >= 0.7 && < 0.8,
243 |                        vector >=0.9 && <0.11,
244 |                        transformers >= 0.3 && < 0.4,
245 |                        filepath >=1.3 && < 1.4,
246 |                        directory,
247 |                        text >= 0.11 && < 0.12,
248 |                        optparse-applicative >=0.5,
249 |                        statistics ==0.10.*,
250 |                        log-domain >= 0.6,
251 |                        mwc-random >= 0.12 && < 0.13,
252 |                        random-fu ==0.2.*,
253 |                        bayes-stack >= 0.2 && < 0.3
254 | 
255 | benchmark lda-benchmark
256 |   type:                exitcode-stdio-1.0
257 |   main-is:             Benchmark.hs
258 |   Ghc-Options:         -H512M -A512M -O2
259 |   build-depends:       base >=4.6 && <4.8,
260 |                        vector >=0.9 && <0.11,
261 |                        statistics ==0.10.*,
262 |                        bimap ==0.2.*,
263 |                        containers ==0.5.*,
264 |                        transformers ==0.3.*,
265 |                        bayes-stack ==0.2.*,
266 |                        text ==0.11.*,
267 |                        random-fu ==0.2.*,
268 |                        mwc-random ==0.12.*,
269 |                        log-domain >= 0.6,
270 |                        bytestring ==0.10.*,
271 |                        binary >=0.7 && < 0.8,
272 |                        deepseq ==1.3.*,
273 |                        directory,
274 |                        criterion
275 | 


--------------------------------------------------------------------------------