├── .gitignore ├── .travis.yml ├── LICENSE ├── Main.hs ├── README.md ├── default.nix ├── index.html ├── miso-from-html.cabal ├── nix └── nixpkgs.json └── shell.nix /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.hi 3 | *.o 4 | tags 5 | dist* 6 | result 7 | cabal.project* 8 | TAGS 9 | dist* 10 | result -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: nix 2 | 3 | before_install: 4 | - nix-channel --list 5 | - nix-channel --update 6 | 7 | script: 8 | - nix-build 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-2020, David M. Johnson 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 9 | 10 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 11 | -------------------------------------------------------------------------------- /Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DerivingStrategies #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | {-# LANGUAGE FlexibleInstances #-} 4 | {-# LANGUAGE FlexibleContexts #-} 5 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 6 | module Main where 7 | 8 | import Control.Applicative 9 | 10 | import Data.Attoparsec.Text 11 | import Data.Char 12 | import Data.List hiding (takeWhile) 13 | import Data.Map (Map) 14 | import qualified Data.Map as M 15 | import Data.Maybe 16 | import Data.Text (Text) 17 | import qualified Data.Text as T 18 | import qualified Data.Text.IO as T 19 | import Prelude hiding (takeWhile) 20 | import System.Exit 21 | import Text.Pretty.Simple 22 | 23 | data HTML 24 | = Branch TagName [ Attr ] [ HTML ] 25 | | Leaf Text 26 | deriving (Eq) 27 | 28 | newtype CSS = CSS (Map Text Text) 29 | deriving (Eq) 30 | deriving newtype (Monoid, Semigroup) 31 | 32 | instance Show CSS where 33 | show (CSS hmap) = 34 | mconcat 35 | [ "M.fromList [ " 36 | , intercalate "," (go <$> M.assocs hmap) 37 | , " ]" 38 | ] 39 | where 40 | go (k,v) = "(" <> "\"" <> 41 | T.unpack k <> "\" ," <> "\"" <> 42 | T.unpack v <> "\" )" 43 | 44 | data Attr = Attr Text (Maybe Text) 45 | deriving (Eq) 46 | 47 | instance Show HTML where 48 | show (Leaf x) = "\"" <> T.unpack x <> "\"" 49 | show (Branch t as cs) = 50 | mconcat $ 51 | [ T.unpack t 52 | , "_ " 53 | , show as 54 | ] ++ [ show cs | not (isEmpty t) ] 55 | 56 | instance Show Attr where 57 | show (Attr "style" (Just v)) = 58 | mconcat 59 | [ "style_ $ " 60 | , T.unpack v 61 | ] 62 | show (Attr k (Just v)) 63 | | T.any (=='-') k = 64 | mconcat 65 | [ "textProp \"" 66 | , T.unpack k 67 | , "\"" 68 | , " \"" 69 | , T.unpack v 70 | , "\"" 71 | ] 72 | | otherwise = 73 | mconcat 74 | [ T.unpack k 75 | , "_ " 76 | , "\"" 77 | , T.unpack v 78 | , "\"" 79 | ] 80 | show (Attr "checked" Nothing) = 81 | "checked_ True" 82 | show (Attr k Nothing) = 83 | mconcat 84 | [ "textProp \"" 85 | , T.unpack k 86 | , "\" \"\"" 87 | ] 88 | 89 | type TagName = Text 90 | 91 | tag :: Parser (TagName, [Attr]) 92 | tag = do 93 | _ <- char '<' 94 | t <- takeWhile1 isAlphaNum 95 | _ <- char '>' 96 | pure (t, []) 97 | 98 | tagWithAttrs :: Parser (TagName, [Attr]) 99 | tagWithAttrs = do 100 | _ <- char '<' 101 | t <- takeWhile1 (/=' ') 102 | _ <- char ' ' 103 | as <- attrs `sepBy` char ' ' 104 | skipSpace 105 | _ <- char '/' <|> char '>' 106 | pure (t, as) 107 | 108 | attrs :: Parser Attr 109 | attrs = kvAttr <|> attr 110 | where 111 | predicate x = isAlpha x || x == '-' 112 | kvAttr = Attr <$> key <*> do Just <$> value 113 | attr = flip Attr Nothing <$> justKey 114 | justKey = takeWhile1 predicate 115 | key = do 116 | k <- takeWhile1 predicate 117 | _ <- char '=' 118 | pure k 119 | value = do 120 | _ <- char '"' 121 | v <- takeWhile (/= '"') 122 | _ <- char '"' 123 | pure v 124 | 125 | children :: Parser [HTML] 126 | children = many htmlOrLeaf 127 | 128 | htmlOrLeaf :: Parser HTML 129 | htmlOrLeaf = html <|> leaf 130 | 131 | leaf :: Parser HTML 132 | leaf = Leaf <$> do 133 | strip . T.filter (/='\n') <$> 134 | takeWhile1 (/='<') 135 | where 136 | strip = T.reverse 137 | . T.dropWhile (==' ') 138 | . T.reverse 139 | . T.dropWhile (==' ') 140 | 141 | dropFluff :: Parser () 142 | dropFluff = do 143 | _ <- takeWhile (`elem` ("\n " :: String)) 144 | pure () 145 | 146 | html :: Parser HTML 147 | html = do 148 | (openTag, as) <- 149 | tag <|> tagWithAttrs 150 | dropFluff 151 | cs <- 152 | if isEmpty openTag 153 | then pure [] 154 | else do 155 | cs <- children 156 | closeTag 157 | pure cs 158 | dropFluff 159 | let hasStyle (Attr k _) = k == "style" 160 | pure $ case find hasStyle as of 161 | Just (Attr key (Just cssText)) -> do 162 | let parsedCss = T.pack $ show (parseCss cssText) 163 | newAttr = Attr key (Just parsedCss) 164 | oldAttrs = filter (not . hasStyle) as 165 | Branch openTag (newAttr : oldAttrs) cs 166 | _ -> 167 | Branch openTag as cs 168 | 169 | parseCss :: Text -> CSS 170 | parseCss cssText = CSS cssMap 171 | where 172 | cssMap 173 | = M.fromList 174 | [ (k,v) 175 | | [k,v] <- T.splitOn ":" <$> T.splitOn ";" cssText 176 | ] 177 | 178 | isEmpty :: Text -> Bool 179 | isEmpty = 180 | flip elem 181 | [ "area" 182 | , "base" 183 | , "br" 184 | , "col" 185 | , "embed" 186 | , "hr" 187 | , "img" 188 | , "input" 189 | , "link" 190 | , "meta" 191 | , "param" 192 | , "source" 193 | , "track" 194 | , "wbr" 195 | ] 196 | 197 | closeTag :: Parser () 198 | closeTag = do 199 | _ <- string "" 200 | _ <- takeWhile1 isAlphaNum 201 | _ <- char '>' 202 | pure () 203 | 204 | main :: IO () 205 | main = do 206 | file <- stripDoctype . removeComments <$> T.getContents 207 | case parseOnly html file of 208 | Right r -> 209 | pPrint r 210 | Left e -> do 211 | print e 212 | exitFailure 213 | 214 | -- | Layered lexer 215 | data Mode 216 | = InComment 217 | | Normal 218 | deriving (Show, Eq) 219 | 220 | stripDoctype :: Text -> Text 221 | stripDoctype t = do 222 | let firstLine = T.takeWhile (/='\n') t 223 | if "" `T.isPrefixOf` (T.toLower firstLine) 224 | then T.drop 1 (T.dropWhile (/='\n') t) 225 | else t 226 | 227 | -- | Remove HTML comments using a layered lexer 228 | -- 229 | -- @ 230 | -- > removeComments "" 231 | -- > 232 | -- @ 233 | -- 234 | removeComments :: Text -> Text 235 | removeComments = go Normal Nothing 236 | where 237 | go Normal Nothing txt = 238 | case T.uncons txt of 239 | Nothing -> 240 | mempty 241 | Just (c, next) -> 242 | T.singleton c <> 243 | go Normal (Just c) next 244 | go Normal (Just _) txt = 245 | case T.uncons txt of 246 | Nothing -> 247 | mempty 248 | Just (c,next) -> 249 | case T.uncons next of 250 | Just (c',next') -> 251 | if [c,c'] == " 256 | go Normal (Just c) next 257 | Nothing -> 258 | T.singleton c <> 259 | go Normal (Just c) next 260 | go InComment Nothing txt = 261 | case T.uncons txt of 262 | Nothing -> 263 | error "Comment not terminated" 264 | Just (c,next) -> 265 | go InComment (Just c) next 266 | go InComment (Just prev) txt = 267 | case T.uncons txt of 268 | Nothing -> 269 | error "Comment not terminated" 270 | Just (c,next) -> 271 | if [prev,c] == "->" 272 | then go Normal (Just c) next 273 | else go InComment (Just c) next 274 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | miso-from-html 2 | =================== 3 |  4 |  5 | [](https://github.com/dmjio/miso-from-html/blob/master/miso-from-html/LICENSE) 6 | [](https://travis-ci.org/dmjio/miso-from-html) 7 | 8 | Convert HTML into [miso](https://github.com/dmjio/miso) `View` syntax. 9 | 10 | ### Features 11 | - Strips comments 12 | - Pretty prints style tags as a Haskell `Map` from `Data.Map` 13 | 14 | ### Usage 15 | 16 | Given some HTML 17 | 18 | ```html 19 | 27 | ``` 28 | 29 | Convert it to [miso](https://github.com/dmjio/miso) `View` syntax. 30 | 31 | ```bash 32 | $ cabal run miso-from-html < index.html 33 | ``` 34 | 35 | Result 36 | 37 | ```haskell 38 | nav_ 39 | [ class_ "navbar" 40 | , role_ "navigation" 41 | ] 42 | [ div_ [ class_ "navbar-brand" ] 43 | [ a_ 44 | [ class_ "navbar-item" 45 | , href_ "https://bulma.io" 46 | ] 47 | [ img_ 48 | [ src_ "https://bulma.io/images/bulma-logo.png" 49 | , width_ "112" 50 | , height_ "28" 51 | ] 52 | , a_ [] 53 | [ "ok" 54 | , p_ [][ "hey" ] 55 | ] 56 | ] 57 | ] 58 | ] 59 | ``` 60 | 61 | ### Test 62 | 63 | ```bash 64 | $ nix-shell --run 'runghc Main.hs < index.html' 65 | ``` 66 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | with (builtins.fromJSON (builtins.readFile ./nix/nixpkgs.json)); 2 | { pkgs ? import (builtins.fetchTarball { 3 | url = "https://github.com/NixOS/nixpkgs/archive/${rev}.tar.gz"; 4 | inherit sha256; 5 | }) { config.allowUnfree = true; } 6 | }: 7 | let 8 | app = pkgs.haskellPackages.callCabal2nix "miso-from-html" ./. {}; 9 | in 10 | { 11 | inherit app pkgs; 12 | } 13 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 |