├── .github └── FUNDING.yml ├── .gitignore ├── .replit ├── APACHE_LICENSE-2.0.txt ├── Blackjack ├── .gitignore ├── Blackjack.cabal ├── Card.hs ├── LICENSE ├── Main.hs ├── README.md ├── RandomizedList.hs ├── Table.hs └── stack.yaml ├── BraveSearch ├── BraveSearch.hs ├── Main.hs ├── README.md └── brave-search-client.cabal ├── ClientServer ├── Client.hs ├── ClientServer.cabal ├── LICENSE ├── README.md ├── Server.hs └── stack.yaml ├── CommandLineApp ├── .gitignore ├── CommandLine1.hs ├── CommandLine2.hs ├── CommandLineApp.cabal ├── GameLoop1.hs ├── GameLoop2.hs ├── LICENSE ├── README.md ├── ReadTextFile.hs ├── ReadTextFileErrorHandling.hs └── stack.yaml ├── Database-postgres ├── .gitignore ├── LICENSE ├── README.md ├── TestDatabase.cabal ├── TestPostgres1.hs └── stack.yaml ├── Database-sqlite ├── .gitignore ├── LICENSE ├── README.md ├── TestDatabase.cabal ├── TestSqLite1.hs └── stack.yaml ├── FastTag ├── FastTag.cabal ├── LICENSE ├── LexiconData.hs ├── README.md ├── fasttag.hs └── stack.yaml ├── HybridHaskellPythonCorefAnaphoraResolution ├── .gitignore ├── ChangeLog.md ├── HybridHaskellPythonCorefAnaphoraResolution.cabal ├── README.md ├── Setup.hs ├── app │ └── Main.hs ├── python_coreference_anaphora_resolution_server │ ├── Makefile │ ├── README.md │ ├── bin │ │ └── corefserver │ ├── corefserver │ │ ├── coref_server.py │ │ └── test │ │ │ └── README.md │ ├── setup.py │ └── test_client.py ├── src │ └── CorefWebClient.hs ├── stack.yaml └── test │ └── Spec.hs ├── HybridHaskellPythonNlp ├── .gitignore ├── ChangeLog.md ├── LICENSE ├── README.md ├── Setup.hs ├── app │ └── Main.hs ├── python_spacy_nlp_server │ ├── Makefile │ ├── README.md │ ├── bin │ │ └── spacynlpserver │ ├── setup.py │ ├── spacynlpserver │ │ ├── server_spacy.py │ │ └── test │ │ │ └── README.md │ └── test_client.py ├── src │ └── NlpWebClient.hs ├── stack.yaml └── test │ └── Spec.hs ├── ImPure ├── .gitignore ├── CommonWords.hs ├── DoLetExample.hs ├── DoLetExample2.hs ├── DoLetExample3.hs ├── FmapExample.hs ├── ImPure.cabal ├── README.md ├── stack.yaml ├── text1.txt └── text2.txt ├── Makefile ├── NlpTool ├── .gitignore ├── NlpTool.cabal ├── README.md ├── WebApp.hs ├── app │ └── NlpTool.hs ├── src │ └── nlp │ │ ├── Categorize.hs │ │ ├── Entities.hs │ │ ├── NlpUtils.hs │ │ ├── Sentence.hs │ │ ├── Stemmer.hs │ │ ├── Summarize.hs │ │ └── data │ │ ├── BroadcastNetworkNamesDbPedia.hs │ │ ├── Category1Gram.hs │ │ ├── Category2Gram.hs │ │ ├── CityNamesDbpedia.hs │ │ ├── CompanyNamesDbpedia.hs │ │ ├── CountryNamesDbpedia.hs │ │ ├── PeopleDbPedia.hs │ │ ├── PoliticalPartyNamesDbPedia.hs │ │ ├── TradeUnionNamesDbPedia.hs │ │ └── UniversityNamesDbPedia.hs └── stack.yaml ├── OpenAiApiClient ├── .gitignore ├── GenText.hs ├── OpenAiApiClient.cabal ├── README.md └── stack.yaml ├── Pure ├── .gitignore ├── Cases.hs ├── ChainedCalls.hs ├── Conditionals.hs ├── Guards.hs ├── IfThenElses.hs ├── LICENSE ├── LetAndWhere.hs ├── MapExamples.hs ├── MyColors.hs ├── NoIO.hs ├── Pure.cabal ├── README.md ├── Simple.hs ├── myfunc1.hs └── stack.yaml ├── README.md ├── SparqlClient ├── .gitignore ├── HttpSparqlClient.hs ├── HttpSparqlJsonClient.hs ├── JsonTest.hs ├── README.md ├── RobsExample.hs ├── SparqlClient.cabal ├── TestSparqlClient.hs └── stack.yaml ├── StateMonad ├── README.md ├── State1.hs ├── stack.yaml └── statemonad.cabal ├── TestingHaskell ├── README.md ├── Setup.hs ├── TestingHaskell.cabal ├── app │ └── Main.hs ├── src │ └── MyColors.hs ├── stack.yaml └── test │ └── Spec.hs ├── TextProcessing ├── CleanText.hs ├── README.md ├── TestAESON.hs ├── TestCSV.hs ├── TestTextJSON.hs ├── TextProcessing.cabal ├── stack.yaml └── test.csv ├── Timers ├── .gitignore ├── README.md ├── TimerTest.hs ├── Timers.cabal └── stack.yaml ├── WebScraping ├── README.md ├── TagSoupTest.hs ├── WebScraping.cabal └── stack.yaml ├── debugging ├── LICENSE ├── README.md ├── TraceTimerTest.hs ├── debugging.cabal └── stack.yaml ├── gemini_commandline ├── Main.hs ├── README.md ├── cabal.project.freeze └── gemini.cabal ├── haskell.svg ├── knowledge_graph_creator_pure ├── .gitignore ├── KGCreator.cabal ├── LICENSE ├── Makefile ├── NEO4J_NOTES.md ├── README.md ├── Setup.hs ├── TBD_FIX.md ├── app │ └── Main.hs ├── dev │ ├── Dev.hs │ └── DevSummarize.hs ├── python_utils │ └── sparql_file_query_example.py ├── src │ ├── fileutils │ │ ├── BlackBoard.hs │ │ ├── DirUtils.hs │ │ └── FileUtils.hs │ ├── sw │ │ ├── GenNeo4jCypher.hs │ │ └── GenTriples.hs │ ├── toplevel │ │ └── Apis.hs │ └── webclients │ │ ├── ClassificationWebClient.hs │ │ ├── CorefWebClient.hs │ │ └── NlpWebClient.hs ├── stack.yaml ├── stack.yaml.lock ├── test │ └── Spec.hs └── test_data │ ├── test1.meta │ ├── test1.txt │ ├── test2.meta │ ├── test2.txt │ ├── test3.meta │ └── test3.txt ├── ollama_commandline ├── Main.hs ├── README.md ├── cabal.project.freeze └── ollama-client.cabal ├── replit.nix └── webchat ├── README.md ├── cabal.project.freeze ├── gemini-chat.cabal └── main.hs /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: mark-watson # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | markwatson: # Replace with a single Patreon username 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | test.db 2 | .idea 3 | *.iml 4 | .stack-work 5 | *~ 6 | _* 7 | .DS_Store 8 | NLPdb.hsproj 9 | .vscode 10 | stack.yaml.lock 11 | out 12 | dist-newstyle 13 | *.dyn_hi 14 | *.dyn_o 15 | 16 | -------------------------------------------------------------------------------- /.replit: -------------------------------------------------------------------------------- 1 | modules = ["haskell-ghc9.4", "python-3.12"] 2 | run = "ls" 3 | 4 | [nix] 5 | channel = "stable-24_05" 6 | 7 | [deployment] 8 | run = ["sh", "-c", "ls"] 9 | -------------------------------------------------------------------------------- /Blackjack/.gitignore: -------------------------------------------------------------------------------- 1 | .stack-work 2 | -------------------------------------------------------------------------------- /Blackjack/Blackjack.cabal: -------------------------------------------------------------------------------- 1 | name: Blackjack 2 | version: 0.1.0.0 3 | synopsis: Simple project template from stack 4 | description: Please see README.md 5 | homepage: https://github.com/mark-watson/haskell_tutorial_cookbook_examples 6 | license: Apache-2.0 7 | license-file: ../APACHE_LICENSE-2.0.txt 8 | author: Mark Watson 9 | maintainer: markw@markwatson.com 10 | copyright: Copyright 2016 Mark Watson. All rights reserved 11 | category: dev 12 | build-type: Simple 13 | cabal-version: >=1.22.4.0 14 | 15 | executable Blackjack 16 | hs-source-dirs: . 17 | main-is: Main.hs 18 | default-language: Haskell2010 19 | build-depends: base >= 4.8.2.0, containers, random, lens, array, MonadRandom 20 | other-modules: Card, Table, RandomizedList 21 | 22 | -------------------------------------------------------------------------------- /Blackjack/Card.hs: -------------------------------------------------------------------------------- 1 | module Card (Card, Rank, Suit, orderedCardDeck, cardValue) where 2 | 3 | import Data.Maybe (fromMaybe) 4 | import Data.List (elemIndex) 5 | import Data.Map (fromList, lookup, keys) 6 | 7 | data Card = Card { rank :: Rank 8 | , suit :: Suit } 9 | deriving (Eq, Show) 10 | 11 | data Suit = Hearts | Diamonds | Clubs | Spades 12 | deriving (Eq, Show, Enum, Ord) 13 | 14 | data Rank = Two | Three | Four 15 | | Five | Six | Seven | Eight 16 | | Nine | Ten | Jack | Queen | King | Ace 17 | deriving (Eq, Show, Enum, Ord) 18 | 19 | rankMap = fromList [(Two,2), (Three,3), (Four,4), (Five,5), 20 | (Six,6), (Seven,7), (Eight,8), (Nine,9), 21 | (Ten,10), (Jack,10), (Queen,10), 22 | (King,10), (Ace,11)] 23 | 24 | orderedCardDeck :: [Card] 25 | orderedCardDeck = [Card rank suit | rank <- keys rankMap, 26 | suit <- [Hearts .. Clubs]] 27 | 28 | cardValue :: Card -> Int 29 | cardValue aCard = 30 | case (Data.Map.lookup (rank aCard) rankMap) of 31 | Just n -> n 32 | Nothing -> 0 -- should never happen 33 | 34 | -------------------------------------------------------------------------------- /Blackjack/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Mark L. Watson 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | -------------------------------------------------------------------------------- /Blackjack/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Card -- pure code 4 | import Table -- pure code 5 | import RandomizedList -- impure code 6 | 7 | printTable :: Table -> IO () 8 | printTable aTable = 9 | putStrLn $ showTable aTable 10 | 11 | randomDeck = 12 | randomizedList orderedCardDeck 13 | 14 | gameLoop :: Table -> Int -> IO b 15 | gameLoop aTable numberOfPlayers = do 16 | printTable aTable 17 | cardDeck <- randomDeck 18 | if (handOver aTable) then 19 | do 20 | putStrLn "\nHand over. State of table at the end of the game:\n" 21 | printTable aTable 22 | putStrLn "\nNewly dealt hand:\n" 23 | gameLoop (initialDeal cardDeck (scoreHands aTable) numberOfPlayers) numberOfPlayers 24 | else 25 | do 26 | putStrLn "Enter command: h)it or set bet to 10, 20, 30; any other key to stay:" 27 | command <- getLine 28 | if elem command ["10", "20", "30"] then gameLoop (setPlayerBet (read command) aTable) numberOfPlayers 29 | else 30 | if command == "h" then gameLoop (dealCards aTable [0 .. numberOfPlayers]) numberOfPlayers 31 | else gameLoop (setPlayerPasses (dealCards aTable [1 .. numberOfPlayers])) numberOfPlayers 32 | -- player stays (no new cards) 33 | 34 | main :: IO b 35 | main = do 36 | print "Start a game of Blackjack. Besides yourself, how many other players do you want at the table?" 37 | s <- getLine 38 | let num = (read s :: Int) + 1 -- player indices: 0)user, 1)dealer, and > 1 are the other players 39 | cardDeck <- randomDeck 40 | let aTable = initialDeal cardDeck (createNewTable num) num 41 | gameLoop aTable num 42 | -------------------------------------------------------------------------------- /Blackjack/README.md: -------------------------------------------------------------------------------- 1 | # Simple implementation of Blackjack game 2 | 3 | The are a few limitations in this implementation: 4 | 5 | - Aces always count as 11 points (instead of 11 or 1) 6 | - The command line user interface does not hide "down cards" beecause it is intended to show internal state of the game 7 | 8 | 9 | ## Running game 10 | 11 | stack build --exec Blackjack 12 | 13 | Notes: 14 | 15 | - Start by entering the number of players (besides the game user) at the table (a good value is 1) 16 | - In the main game loop, you can enter one of: 17 | -- 10, 20, 30: change the user's bet by typing a number 18 | -- h: hit (user gets dealt a card) and the dealer and other player hit if they have < 17 19 | -- blank line: user passes and other players and dealer keep hitting until that have > 16 or they bust (have > 21) 20 | 21 | After a user pass, just start a new game by hitting 'h', etc. 22 | 23 | ## Maintain state of game without using a State Monad 24 | 25 | Solution, I used my code for the "Game Loop" pattern from my book [Haskell Tutorial and Cookbook](https://leanpub.com/haskell-cookbook). 26 | 27 | ### Player Chips 28 | 29 | Starting value of 10, can be changed during the game. 30 | 31 | ## Data Transformations 32 | 33 | I maintain a read-only value for a Table value. Many functions take a Table and return a modified Table. 34 | 35 | 36 | ## Interactive development 37 | 38 | -- After 'stack ghci' the following is useful during development: 39 | 40 | -- let cardDeck = [Card {rank = Eight, suit = Clubs},Card {rank = Three, suit = Hearts},Card {rank = Ace, suit = Hearts},Card {rank = Six, suit = Hearts},Card {rank = Ace, suit = Clubs},Card {rank = Jack, suit = Diamonds},Card {rank = Nine, suit = Clubs},Card {rank = Two, suit = Clubs},Card {rank = Ten, suit = Hearts},Card {rank = Jack, suit = Clubs},Card {rank = Five, suit = Clubs},Card {rank = Four, suit = Diamonds},Card {rank = Queen, suit = Clubs},Card {rank = King, suit = Diamonds},Card {rank = Ace, suit = Diamonds},Card {rank = Nine, suit = Diamonds},Card {rank = Eight, suit = Hearts},Card {rank = Ten, suit = Diamonds},Card {rank = King, suit = Hearts},Card {rank = Queen, suit = Diamonds},Card {rank = Four, suit = Hearts},Card {rank = Seven, suit = Hearts},Card {rank = King, suit = Clubs},Card {rank = Ten, suit = Clubs},Card {rank = Jack, suit = Hearts},Card {rank = Three, suit = Clubs},Card {rank = Two, suit = Hearts},Card {rank = Seven, suit = Diamonds},Card {rank = Nine, suit = Hearts},Card {rank = Eight, suit = Diamonds},Card {rank = Three, suit = Diamonds},Card {rank = Four, suit = Clubs},Card {rank = Queen, suit = Hearts},Card {rank = Seven, suit = Clubs},Card {rank = Two, suit = Diamonds},Card {rank = Six, suit = Clubs},Card {rank = Five, suit = Diamonds},Card {rank = Six, suit = Diamonds},Card {rank = Five, suit = Hearts}] 41 | -- let tt = setCardDeck cardDeck (createNewTable 2) 42 | -- let tt2 = dealCardToUser tt 1 43 | -- setPlayerBet 99 tt 44 | -- changeChipStack 2 88 tt 45 | -- _chipStacks tt 46 | -------------------------------------------------------------------------------- /Blackjack/RandomizedList.hs: -------------------------------------------------------------------------------- 1 | module RandomizedList (randomizedList) where 2 | 3 | 4 | import System.Random 5 | import Data.Array.IO 6 | import Control.Monad 7 | 8 | -- NOTE: the following is copied from https://wiki.haskell.org/Random_shuffle: 9 | 10 | randomizedList :: [a] -> IO [a] 11 | randomizedList xs = do 12 | ar <- newArray n xs 13 | forM [1..n] $ \i -> do 14 | j <- randomRIO (i,n) 15 | vi <- readArray ar i 16 | vj <- readArray ar j 17 | writeArray ar j vi 18 | return vj 19 | where 20 | n = length xs 21 | newArray :: Int -> [a] -> IO (IOArray Int a) 22 | newArray n xs = newListArray (1,n) xs 23 | 24 | -------------------------------------------------------------------------------- /Blackjack/stack.yaml: -------------------------------------------------------------------------------- 1 | # For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md 2 | 3 | resolver: lts-22.26 4 | 5 | allow-newer: false 6 | 7 | # Local packages, usually specified by relative directory name 8 | packages: 9 | - '.' 10 | 11 | # Packages to be pulled from upstream that are not in the resolver (e.g., acme-missiles-0.3) 12 | extra-deps: [] 13 | 14 | # Override default flag values for local packages and extra-deps 15 | flags: {} 16 | 17 | # Control whether we use the GHC we find on the path 18 | # system-ghc: true 19 | 20 | # Require a specific version of stack, using version ranges 21 | # require-stack-version: -any # Default 22 | # require-stack-version: >= 0.1.4.0 23 | 24 | # Override the architecture used by stack, especially useful on Windows 25 | # arch: i386 26 | # arch: x86_64 27 | 28 | # Extra directories used by stack for building 29 | # extra-include-dirs: [/path/to/dir] 30 | # extra-lib-dirs: [/path/to/dir] 31 | -------------------------------------------------------------------------------- /BraveSearch/BraveSearch.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE RecordWildCards #-} 3 | 4 | module BraveSearch 5 | ( getSearchSuggestions 6 | ) where 7 | 8 | import Network.HTTP.Simple 9 | import Data.Text.Encoding (encodeUtf8) 10 | import Data.Aeson 11 | import qualified Data.Text as T 12 | import Control.Exception (try) 13 | import Network.HTTP.Client (HttpException) 14 | import qualified Data.ByteString.Char8 as BS 15 | -- removed unused import Data.ByteString.Lazy.Char8 16 | 17 | data SearchResponse = SearchResponse 18 | { query :: QueryInfo 19 | , web :: WebResults 20 | } deriving (Show) 21 | 22 | data QueryInfo = QueryInfo 23 | { original :: T.Text 24 | } deriving (Show) 25 | 26 | data WebResults = WebResults 27 | { results :: [WebResult] 28 | } deriving (Show) 29 | 30 | data WebResult = WebResult 31 | { type_ :: T.Text 32 | , index :: Maybe Int 33 | , all :: Maybe Bool 34 | , title :: Maybe T.Text 35 | , url :: Maybe T.Text 36 | , description :: Maybe T.Text 37 | } deriving (Show) 38 | 39 | instance FromJSON SearchResponse where 40 | parseJSON = withObject "SearchResponse" $ \v -> SearchResponse 41 | <$> v .: "query" 42 | <*> v .: "web" 43 | 44 | instance FromJSON QueryInfo where 45 | parseJSON = withObject "QueryInfo" $ \v -> QueryInfo 46 | <$> v .: "original" 47 | 48 | instance FromJSON WebResults where 49 | parseJSON = withObject "WebResults" $ \v -> WebResults 50 | <$> v .: "results" 51 | 52 | instance FromJSON WebResult where 53 | parseJSON = withObject "WebResult" $ \v -> WebResult 54 | <$> v .: "type" 55 | <*> v .:? "index" 56 | <*> v .:? "all" 57 | <*> v .:? "title" 58 | <*> v .:? "url" 59 | <*> v .:? "description" 60 | 61 | -- | Perform a Brave Search with the given API key (as raw bytes) and text query. 62 | getSearchSuggestions :: BS.ByteString -> T.Text -> IO (Either T.Text [T.Text]) 63 | getSearchSuggestions apiKey query = do 64 | -- Build base request 65 | let baseUrl = "https://api.search.brave.com/res/v1/web/search" 66 | request0 <- parseRequest baseUrl 67 | -- Add query parameters (URL-encoded) and headers 68 | let request1 = setRequestQueryString 69 | [ ("q", Just $ encodeUtf8 query) 70 | , ("country", Just "US") 71 | , ("count", Just "5") 72 | ] 73 | request0 74 | request = setRequestHeader "Accept" ["application/json"] 75 | $ setRequestHeader "X-Subscription-Token" [apiKey] 76 | $ request1 77 | 78 | result <- try $ httpLBS request 79 | 80 | case result of 81 | Left e -> return . Left $ T.pack $ "Network error: " ++ show (e :: HttpException) 82 | Right response -> 83 | let status = getResponseStatusCode response 84 | in if status /= 200 85 | then return . Left $ T.pack $ "HTTP error: " ++ show status 86 | else case eitherDecode (getResponseBody response) of 87 | Left err -> return . Left $ T.pack $ "JSON parsing error: " ++ err 88 | Right SearchResponse{..} -> 89 | let originalQuery = original query 90 | webResults = results web 91 | suggestions = ("Original Query: " <> originalQuery) 92 | : map formatResult webResults 93 | in return $ Right suggestions 94 | 95 | formatResult :: WebResult -> T.Text 96 | formatResult WebResult{..} = 97 | let titleText = maybe "N/A" ("Title: " <>) title 98 | urlText = maybe "N/A" ("URL: " <>) url 99 | descText = maybe "N/A" ("Description: " <>) (fmap (T.take 100) description) 100 | in T.intercalate " | " [titleText, urlText, descText] 101 | -------------------------------------------------------------------------------- /BraveSearch/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Main where 4 | 5 | import BraveSearch (getSearchSuggestions) 6 | import qualified Data.ByteString.Char8 as BS 7 | import System.Environment (getEnv) 8 | import qualified Data.Text as T 9 | import qualified Data.Text.IO as TIO 10 | 11 | main :: IO () 12 | main = do 13 | -- Get the API key from the environment variable 14 | -- Read API key from environment and convert to ByteString 15 | apiKeyRaw <- getEnv "BRAVE_SEARCH_API_KEY" 16 | let apiKey = BS.pack apiKeyRaw 17 | 18 | -- Prompt the user for a search query 19 | TIO.putStrLn "Enter a search query:" 20 | query <- TIO.getLine 21 | 22 | -- Call the function to get search suggestions 23 | result <- getSearchSuggestions apiKey query 24 | 25 | case result of 26 | Left err -> TIO.putStrLn $ "Error: " <> err 27 | Right suggestions -> do 28 | TIO.putStrLn "Search suggestions:" 29 | mapM_ (TIO.putStrLn . ("- " <>)) suggestions 30 | -------------------------------------------------------------------------------- /BraveSearch/README.md: -------------------------------------------------------------------------------- 1 | # Brave Search APIs 2 | 3 | You need to sign up for a free or paid for account on the [Brave search page](https://brave.com/search/api/) and set an environment variable to your assigned API key: 4 | 5 | ```{line-numbers: false} 6 | export BRAVE_SEARCH_API_KEY = BSAgQ-Nc5..... 7 | ``` 8 | 9 | ## Build and run 10 | 11 | cabal build 12 | cabal run 13 | 14 | -------------------------------------------------------------------------------- /BraveSearch/brave-search-client.cabal: -------------------------------------------------------------------------------- 1 | name: brave-search-client 2 | version: 0.1.0.0 3 | build-type: Simple 4 | cabal-version: >=1.10 5 | 6 | executable brave-search-client 7 | main-is: Main.hs 8 | other-modules: BraveSearch 9 | build-depends: base >=4.7 && <5 10 | , http-conduit 11 | , aeson 12 | , text 13 | , bytestring 14 | , http-client 15 | default-language: Haskell2010 -------------------------------------------------------------------------------- /ClientServer/Client.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Main where 4 | 5 | import Control.Monad 6 | import qualified Network.Simple.TCP as T 7 | 8 | main = do 9 | T.connect "127.0.0.1" "3000" $ \(connectionSocket, remoteAddr) -> do 10 | putStrLn $ "Connection established to " ++ show remoteAddr 11 | T.send connectionSocket "test123" 12 | response <- T.recv connectionSocket 100 13 | case response of 14 | Just s -> putStrLn $ "Response: " ++ show s 15 | Nothing -> putStrLn "No response from server" 16 | 17 | -------------------------------------------------------------------------------- /ClientServer/ClientServer.cabal: -------------------------------------------------------------------------------- 1 | name: ClientServer 2 | version: 0.1.0.0 3 | synopsis: Simple project template from stack 4 | description: Please see README.md 5 | homepage: https://github.com/mark-watson/haskell_tutorial_cookbook_examples 6 | license: Apache-2.0 7 | license-file: ../APACHE_LICENSE-2.0.txt 8 | author: Mark Watson 9 | maintainer: markw@markwatson.com 10 | copyright: Copyright 2016 Mark Watson. All rights reserved 11 | category: dev 12 | build-type: Simple 13 | cabal-version: >=1.22.4.0 14 | 15 | executable Server 16 | hs-source-dirs: . 17 | main-is: Server.hs 18 | default-language: Haskell2010 19 | build-depends: base >= 4.7 && < 5, bytestring, network-simple 20 | 21 | executable Client 22 | hs-source-dirs: . 23 | main-is: Client.hs 24 | default-language: Haskell2010 25 | build-depends: base >= 4.7 && < 5, bytestring, network-simple 26 | -------------------------------------------------------------------------------- /ClientServer/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Mark Watson (c) 2016 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Mark Watson nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /ClientServer/README.md: -------------------------------------------------------------------------------- 1 | # running example code 2 | 3 | stack build 4 | 5 | Then in two terminal windows: 6 | 7 | stack exec Server 8 | stack exec Client -------------------------------------------------------------------------------- /ClientServer/Server.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Main where 4 | 5 | -- example derived from an network-simple library example 6 | 7 | import Control.Monad 8 | import qualified Data.ByteString.Char8 as B 9 | import qualified Network.Simple.TCP as T 10 | 11 | reverseStringLoop sock = do 12 | -- get a byte string wrapped as a MonadIO: 13 | mbs <- T.recv sock 4096 14 | case mbs of 15 | Just bs -> T.send sock (B.reverse bs) >> reverseStringLoop sock 16 | Nothing -> return () 17 | 18 | main :: IO () 19 | main = T.withSocketsDo $ do -- derived from library example 20 | T.listen "*" "3000" $ \(lsock, laddr) -> do 21 | putStrLn $ "Listening at " ++ show laddr 22 | forever . T.acceptFork lsock $ \(sock, addr) -> do 23 | putStrLn $ "Connection from " ++ show addr 24 | reverseStringLoop sock 25 | 26 | -------------------------------------------------------------------------------- /ClientServer/stack.yaml: -------------------------------------------------------------------------------- 1 | # For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md 2 | 3 | resolver: lts-22.26 4 | 5 | allow-newer: false 6 | 7 | # Local packages, usually specified by relative directory name 8 | packages: 9 | - '.' 10 | 11 | # Packages to be pulled from upstream that are not in the resolver (e.g., acme-missiles-0.3) 12 | extra-deps: [] 13 | 14 | # Override default flag values for local packages and extra-deps 15 | flags: {} 16 | 17 | # Control whether we use the GHC we find on the path 18 | # system-ghc: true 19 | 20 | # Require a specific version of stack, using version ranges 21 | # require-stack-version: -any # Default 22 | # require-stack-version: >= 0.1.4.0 23 | 24 | # Override the architecture used by stack, especially useful on Windows 25 | # arch: i386 26 | # arch: x86_64 27 | 28 | # Extra directories used by stack for building 29 | # extra-include-dirs: [/path/to/dir] 30 | # extra-lib-dirs: [/path/to/dir] 31 | -------------------------------------------------------------------------------- /CommandLineApp/.gitignore: -------------------------------------------------------------------------------- 1 | temp.txt 2 | -------------------------------------------------------------------------------- /CommandLineApp/CommandLine1.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.IO 4 | import Data.Char (toUpper) 5 | 6 | main = do 7 | putStrLn "Enter a line of text for test 1:" 8 | s <- getLine 9 | putStrLn $ "As upper case:\t" ++ (map toUpper s) 10 | main 11 | 12 | -------------------------------------------------------------------------------- /CommandLineApp/CommandLine2.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.IO 4 | import Data.Char (toUpper) 5 | 6 | main = do 7 | putStrLn "Enter a line of text for test2:" 8 | s <- getLine 9 | putStrLn $ "As upper case:\t" ++ (map toUpper s) 10 | appendFile "temp.txt" $ s ++ "\n" 11 | main 12 | 13 | -------------------------------------------------------------------------------- /CommandLineApp/CommandLineApp.cabal: -------------------------------------------------------------------------------- 1 | name: CommandLineApp 2 | version: 0.1.0.0 3 | synopsis: Simple project template from stack 4 | description: Please see README.md 5 | homepage: https://github.com/mark-watson/haskell_tutorial_cookbook_examples 6 | license: Apache-2.0 7 | license-file: ../APACHE_LICENSE-2.0.txt 8 | author: Mark Watson 9 | maintainer: markw@markwatson.com 10 | copyright: Copyright 2016 Mark Watson. All rights reserved 11 | category: dev 12 | build-type: Simple 13 | cabal-version: >=1.22.4.0 14 | 15 | executable CommandLine1 16 | hs-source-dirs: . 17 | main-is: CommandLine1.hs 18 | default-language: Haskell2010 19 | build-depends: base >= 4.7 && < 5 20 | 21 | executable CommandLine2 22 | hs-source-dirs: . 23 | main-is: CommandLine2.hs 24 | default-language: Haskell2010 25 | build-depends: base >= 4.7 && < 5 26 | 27 | executable ReadTextFile 28 | hs-source-dirs: . 29 | main-is: ReadTextFile.hs 30 | default-language: Haskell2010 31 | build-depends: base >= 4.7 && < 5 32 | 33 | executable ReadTextFileErrorHandling 34 | hs-source-dirs: . 35 | main-is: ReadTextFile.hs 36 | default-language: Haskell2010 37 | build-depends: base >= 4.7 && < 5, mtl 38 | 39 | executable GameLoop1 40 | hs-source-dirs: . 41 | main-is: GameLoop1.hs 42 | default-language: Haskell2010 43 | build-depends: base >= 4.7 && < 5, time 44 | 45 | executable GameLoop2 46 | hs-source-dirs: . 47 | main-is: GameLoop2.hs 48 | default-language: Haskell2010 49 | build-depends: base >= 4.7 && < 5, random 50 | -------------------------------------------------------------------------------- /CommandLineApp/GameLoop1.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Data.Time.Clock.POSIX 4 | 5 | data GameState = GameState { numberToGuess::Integer, numTries::Integer} 6 | deriving (Show) 7 | 8 | gameLoop :: GameState -> IO GameState 9 | gameLoop gs = do 10 | print $ numberToGuess gs 11 | putStrLn "Enter a number:" 12 | s <- getLine 13 | let num = read s :: Integer 14 | if num == numberToGuess gs then 15 | return gs 16 | else gameLoop $ GameState (numberToGuess gs) ((numTries gs) + 1) 17 | 18 | main = do 19 | pTime <- round `fmap` getPOSIXTime 20 | let gameState = GameState (pTime `mod` 5) 1 21 | print "Guess a number between 1 and 4" 22 | gameLoop gameState 23 | 24 | -------------------------------------------------------------------------------- /CommandLineApp/GameLoop2.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.Random 4 | 5 | data GameState = GameState { numberToGuess::Integer, numTries::Integer} 6 | deriving (Show) 7 | 8 | gameLoop :: GameState -> IO GameState 9 | gameLoop gs = do 10 | print $ numberToGuess gs 11 | putStrLn "Enter a number:" 12 | s <- getLine 13 | let num = read s :: Integer 14 | if num == numberToGuess gs then 15 | return gs 16 | else gameLoop $ GameState (numberToGuess gs) ((numTries gs) + 1) 17 | 18 | main = do 19 | pTime <- randomRIO(1,4) 20 | let gameState = GameState pTime 1 21 | print "Guess a number between 1 and 4" 22 | gameLoop gameState 23 | -------------------------------------------------------------------------------- /CommandLineApp/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Mark Watson (c) 2016 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Mark Watson nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /CommandLineApp/README.md: -------------------------------------------------------------------------------- 1 | # Building and running the program 2 | 3 | ~~~~~~~~ 4 | stack build --exec CommandLine1 5 | ~~~~~~~~ 6 | 7 | -------------------------------------------------------------------------------- /CommandLineApp/ReadTextFile.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.IO 4 | import Control.Monad 5 | 6 | main = do 7 | entireFileAsString <- readFile "temp.txt" 8 | print entireFileAsString 9 | let allWords = words entireFileAsString 10 | print allWords -------------------------------------------------------------------------------- /CommandLineApp/ReadTextFileErrorHandling.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.IO 4 | import Control.Exception 5 | 6 | -- catchAny from Michael Snoyman's aticle: 7 | -- (https://www.schoolofhaskell.com/user/snoyberg/general-haskell/exceptions/catching-all-exceptions: 8 | catchAny :: IO a -> (SomeException -> IO a) -> IO a 9 | catchAny = Control.Exception.catch 10 | 11 | safeFileReader :: FilePath -> IO String 12 | safeFileReader fPath = do 13 | entireFileAsString <- catchAny (readFile fPath) $ \error -> do 14 | putStrLn $ "Error: " ++ show error 15 | return "" 16 | return entireFileAsString 17 | 18 | main :: IO () 19 | main = do 20 | fContents <- safeFileReader "temp.txt" 21 | print fContents 22 | print $ words fContents 23 | -------------------------------------------------------------------------------- /CommandLineApp/stack.yaml: -------------------------------------------------------------------------------- 1 | # For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md 2 | 3 | # Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2) 4 | resolver: lts-22.26 5 | 6 | allow-newer: false 7 | 8 | # Local packages, usually specified by relative directory name 9 | packages: 10 | - '.' 11 | 12 | # Packages to be pulled from upstream that are not in the resolver (e.g., acme-missiles-0.3) 13 | extra-deps: [] 14 | 15 | # Override default flag values for local packages and extra-deps 16 | flags: {} 17 | 18 | # Control whether we use the GHC we find on the path 19 | # system-ghc: true 20 | 21 | # Require a specific version of stack, using version ranges 22 | # require-stack-version: -any # Default 23 | # require-stack-version: >= 0.1.4.0 24 | 25 | # Override the architecture used by stack, especially useful on Windows 26 | # arch: i386 27 | # arch: x86_64 28 | 29 | # Extra directories used by stack for building 30 | # extra-include-dirs: [/path/to/dir] 31 | # extra-lib-dirs: [/path/to/dir] 32 | -------------------------------------------------------------------------------- /Database-postgres/.gitignore: -------------------------------------------------------------------------------- 1 | test.db 2 | -------------------------------------------------------------------------------- /Database-postgres/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Mark Watson (c) 2015 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Mark Watson nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Database-postgres/README.md: -------------------------------------------------------------------------------- 1 | # Database Examples 2 | 3 | ## Test database: 4 | 5 | markw=# create database haskell; 6 | CREATE DATABASE 7 | markw=# \c haskell 8 | You are now connected to database "haskell" as user "markw". 9 | 10 | create table customers (id int, name text, email text); 11 | CREATE TABLE 12 | haskell=# insert into customers values (1, 'Acme Cement', 'info@acmecement.com'); 13 | INSERT 0 1 14 | haskell=# insert into customers values (2, 'Biff Home Sales', 'info@biff.com'); 15 | INSERT 0 1 16 | haskell=# insert into customers values (3, 'My Pens', 'info@mypens.com'); 17 | 18 | markw=# \c haskell 19 | You are now connected to database "haskell" as user "markw". 20 | haskell=# \d 21 | List of relations 22 | Schema | Name | Type | Owner 23 | --------+-----------+-------+------- 24 | public | customers | table | markw 25 | public | links | table | markw 26 | public | products | table | markw 27 | (3 rows) 28 | 29 | haskell=# select * from customers; 30 | id | name | email 31 | ----+-----------------+--------------------- 32 | 1 | Acme Cement | info@acmecement.com 33 | 2 | Biff Home Sales | info@biff.com 34 | 3 | My Pens | info@mypens.com 35 | (3 rows) 36 | 37 | haskell=# select * from products; 38 | id | name | cost 39 | ----+---------------+------ 40 | 1 | Cement bag | 2.5 41 | 2 | Cheap Pen | 1.5 42 | 3 | Expensive Pen | 14.5 43 | (3 rows) 44 | 45 | haskell=# select * from links; 46 | id | customer_id | productid 47 | ----+-------------+----------- 48 | 1 | 1 | 1 49 | 2 | 3 | 2 50 | 3 | 3 | 3 51 | (3 rows) 52 | 53 | 54 | Then build and run: 55 | 56 | 57 | ```````` 58 | stack build --exec TestPostgres1 59 | ```````` 60 | 61 | -------------------------------------------------------------------------------- /Database-postgres/TestDatabase.cabal: -------------------------------------------------------------------------------- 1 | name: TestDatabase 2 | version: 0.1.0.0 3 | synopsis: Simple project template from stack 4 | description: Please see README.md 5 | homepage: https://github.com/mark-watson/haskell_tutorial_cookbook_examples 6 | license: Apache-2.0 7 | license-file: ../APACHE_LICENSE-2.0.txt 8 | author: Mark Watson 9 | maintainer: markw@markwatson.com 10 | copyright: Copyright 2016 Mark Watson. All rights reserved 11 | category: dev 12 | build-type: Simple 13 | cabal-version: >=1.22.4.0 14 | 15 | executable TestPostgres1 16 | hs-source-dirs: . 17 | main-is: TestPostgres1.hs 18 | default-language: Haskell2010 19 | build-depends: base >= 4.7 && < 5, containers, split, MissingH, postgresql-simple, HDBC 20 | -------------------------------------------------------------------------------- /Database-postgres/TestPostgres1.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Main where 4 | 5 | import Database.PostgreSQL.Simple 6 | 7 | main :: IO () 8 | main = do 9 | conn <- connect defaultConnectInfo { connectDatabase = "haskell", connectUser = "markw", connectPassword = "test1"} 10 | -- start by getting table names in database: 11 | do 12 | r <- query_ conn "SELECT name FROM customers" :: IO [(Only String)] 13 | print "names and emails in table 'customers' in database haskell:" 14 | mapM_ (print . fromOnly) r 15 | 16 | -- add a row to table 'test' and then print out the rows in table 'test': 17 | do 18 | let rows :: [(Int, String, String)] 19 | rows = [(4, "Mary Smith", "marys@acme.com")] 20 | executeMany conn "INSERT INTO customers (id, name, email) VALUES (?,?,?)" rows 21 | r2 <- query_ conn "SELECT * from customers" :: IO [(Int, String, String)] 22 | print "number of rows in table 'customers':" 23 | print (length r2) 24 | print "rows in table 'customers':" 25 | mapM_ print r2 26 | 27 | close conn 28 | -------------------------------------------------------------------------------- /Database-postgres/stack.yaml: -------------------------------------------------------------------------------- 1 | # For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md 2 | 3 | # Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2) 4 | resolver: lts-22.26 5 | 6 | allow-newer: true 7 | 8 | # Local packages, usually specified by relative directory name 9 | packages: 10 | - '.' 11 | 12 | # Packages to be pulled from upstream that are not in the resolver (e.g., acme-missiles-0.3) 13 | extra-deps: [] 14 | 15 | # Override default flag values for local packages and extra-deps 16 | flags: {} 17 | 18 | # Control whether we use the GHC we find on the path 19 | # system-ghc: true 20 | 21 | # Require a specific version of stack, using version ranges 22 | # require-stack-version: -any # Default 23 | # require-stack-version: >= 0.1.4.0 24 | 25 | # Override the architecture used by stack, especially useful on Windows 26 | # arch: i386 27 | # arch: x86_64 28 | 29 | # Extra directories used by stack for building 30 | # extra-include-dirs: [/path/to/dir] 31 | # extra-lib-dirs: [/path/to/dir] 32 | -------------------------------------------------------------------------------- /Database-sqlite/.gitignore: -------------------------------------------------------------------------------- 1 | test.db 2 | -------------------------------------------------------------------------------- /Database-sqlite/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Mark Watson (c) 2015 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Mark Watson nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Database-sqlite/README.md: -------------------------------------------------------------------------------- 1 | # Database Examples 2 | 3 | ## Run first sqlite example 4 | 5 | Create sqlite database: 6 | ```````` 7 | sqlite3 test.db "create table test (id integer primary key, str text);" 8 | ```````` 9 | 10 | Then build and run: 11 | 12 | 13 | ```````` 14 | stack build --exec TestSqLite1 15 | ```````` 16 | 17 | ## Run using Replit.com, Nix, Cabal 18 | 19 | sqlite3 test.db "create table test (id integer primary key, str text);" 20 | cabal build 21 | cabal run -------------------------------------------------------------------------------- /Database-sqlite/TestDatabase.cabal: -------------------------------------------------------------------------------- 1 | name: TestDatabase 2 | version: 0.1.0.0 3 | synopsis: SqLite example 4 | description: Please see README.md 5 | homepage: https://github.com/mark-watson/haskell_tutorial_cookbook_examples 6 | license: Apache-2.0 7 | license-file: ../APACHE_LICENSE-2.0.txt 8 | author: Mark Watson 9 | maintainer: markw@markwatson.com 10 | copyright: Copyright 2016 Mark Watson. All rights reserved 11 | category: dev 12 | build-type: Simple 13 | cabal-version: >=1.22.4.0 14 | 15 | executable TestSqLite1 16 | hs-source-dirs: . 17 | main-is: TestSqLite1.hs 18 | default-language: Haskell2010 19 | build-depends: base >= 4.7 && < 5, containers, split, MissingH, sqlite-simple 20 | -------------------------------------------------------------------------------- /Database-sqlite/TestSqLite1.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | module Main where 4 | 5 | import Database.SQLite.Simple 6 | 7 | {- 8 | Create sqlite database: 9 | sqlite3 test.db "create table test (id integer primary key, str text);" 10 | 11 | This example is derived from the example at github.com/nurpax/sqlite-simple 12 | -} 13 | 14 | main :: IO () 15 | main = do 16 | conn <- open "test.db" 17 | -- start by getting table names in database: 18 | do 19 | r <- query_ conn "SELECT name FROM sqlite_master WHERE type='table'" :: IO [Only String] 20 | print "Table names in database test.db:" 21 | mapM_ (print . fromOnly) r 22 | 23 | -- get the metadata for table test in test.db: 24 | do 25 | r <- query_ conn "SELECT sql FROM sqlite_master WHERE type='table' and name='test'" :: IO [Only String] 26 | print "SQL to create table 'test' in database test.db:" 27 | mapM_ (print . fromOnly) r 28 | 29 | -- add a row to table 'test' and then print out the rows in table 'test': 30 | do 31 | execute conn "INSERT INTO test (str) VALUES (?)" 32 | (Only ("test string 2" :: String)) 33 | r2 <- query_ conn "SELECT * from test" :: IO [(Int, String)] 34 | print "number of rows in table 'test':" 35 | print (length r2) 36 | print "rows in table 'test':" 37 | mapM_ print r2 38 | 39 | close conn 40 | -------------------------------------------------------------------------------- /Database-sqlite/stack.yaml: -------------------------------------------------------------------------------- 1 | # For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md 2 | 3 | # Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2) 4 | resolver: lts-22.26 5 | 6 | allow-newer: false 7 | 8 | # Local packages, usually specified by relative directory name 9 | packages: 10 | - '.' 11 | 12 | # Packages to be pulled from upstream that are not in the resolver (e.g., acme-missiles-0.3) 13 | extra-deps: 14 | - MissingH-1.6.0.1 15 | - random-1.2.1.2 16 | - sqlite-simple-0.4.19.0 17 | - direct-sqlite-2.3.29 18 | 19 | # Override default flag values for local packages and extra-deps 20 | flags: {} 21 | 22 | # Control whether we use the GHC we find on the path 23 | # system-ghc: true 24 | 25 | # Require a specific version of stack, using version ranges 26 | # require-stack-version: -any # Default 27 | # require-stack-version: >= 0.1.4.0 28 | 29 | # Override the architecture used by stack, especially useful on Windows 30 | # arch: i386 31 | # arch: x86_64 32 | 33 | # Extra directories used by stack for building 34 | # extra-include-dirs: [/path/to/dir] 35 | # extra-lib-dirs: [/path/to/dir] 36 | -------------------------------------------------------------------------------- /FastTag/FastTag.cabal: -------------------------------------------------------------------------------- 1 | name: FastTag 2 | version: 0.1.0.0 3 | synopsis: Simple 4 | description: Please see README.md 5 | homepage: https://github.com/mark-watson/haskell_tutorial_cookbook_examples 6 | license: Apache-2.0 7 | license-file: ../APACHE_LICENSE-2.0.txt 8 | author: Mark Watson 9 | maintainer: markw@markwatson.com 10 | copyright: Copyright 2016 Mark Watson. All rights reserved 11 | category: dev 12 | build-type: Simple 13 | cabal-version: >=1.22.4.0 14 | 15 | executable fasttag 16 | hs-source-dirs: . 17 | main-is: fasttag.hs 18 | default-language: Haskell2010 19 | build-depends: base >= 4.7 && < 5, containers, strings 20 | other-modules: LexiconData 21 | -------------------------------------------------------------------------------- /FastTag/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Mark Watson (c) 2016 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Mark Watson nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /FastTag/README.md: -------------------------------------------------------------------------------- 1 | # Haskell version of FastTag 2 | 3 | Copyright 2016 Mark Watson (http://markwatson.com) 4 | 5 | Licensed under LGPL3 or Apache 2 licenses: take your pick. 6 | 7 | The lexicon file is derived from Eric Brill's data and converted to Haskell source code that creates a map used to look up words. 8 | 9 | ACKNOWLEDGMENTS: 10 | ---------------- 11 | 12 | - Eric Brill for his lexicon and trained rule set: http://www.cs.jhu.edu/~brill/ 13 | 14 | - Medpost team for their tagging lexicon: http://mmtx.nlm.nih.gov/MedPost_SKR.shtml 15 | 16 | - Brant Chee for bug reports and bug fixes to the Java version of FastTag 17 | 18 | ## Running code 19 | 20 | stack build 21 | stack exec fasttag 22 | 23 | TAG DEFINITIONS: 24 | ---------------- 25 | 26 |
27 | 28 | CC Coord Conjuncn and,but,or 29 | NN Noun, sing. or mass dog 30 | CD Cardinal number one,two 31 | NNS Noun, plural dogs 32 | DT Determiner the,some 33 | NNP Proper noun, sing. Edinburgh 34 | EX Existential there there 35 | NNPS Proper noun, plural Smiths 36 | FW Foreign Word mon dieu 37 | PDT Predeterminer all, both 38 | IN Preposition of,in,by 39 | POS Possessive ending Õs 40 | JJ Adjective big 41 | PP Personal pronoun I,you,she 42 | JJR Adj., comparative bigger 43 | PP$ Possessive pronoun my,oneÕs 44 | JJS Adj., superlative biggest 45 | RB Adverb quickly 46 | LS List item marker 1,One 47 | RBR Adverb, comparative faster 48 | MD Modal can,should 49 | RBS Adverb, superlative fastest 50 | RP Particle up,off 51 | WP$ Possessive-Wh whose 52 | SYM Symbol +,%,& 53 | WRB Wh-adverb how,where 54 | TO ÒtoÓ to 55 | $ Dollar sign $ 56 | UH Interjection oh, oops 57 | # Pound sign # 58 | VB verb, base form eat 59 | " quote " 60 | VBD verb, past tense ate 61 | VBG verb, gerund eating 62 | ( Left paren ( 63 | VBN verb, past part eaten 64 | ) Right paren ) 65 | VBP Verb, present eat 66 | , Comma , 67 | VBZ Verb, present eats 68 | . Sent-final punct . ! ? 69 | WDT Wh-determiner which,that 70 | : Mid-sent punct. : ; Ñ 71 | WP Wh pronoun who,what 72 | 73 | 74 | MEDPOST TAG DEFINITIONS: 75 | 76 | CC coordinating conjunction 77 | CS subordinating conjunction 78 | CSN comparative conjunction (than) 79 | CST complementizer (that) 80 | DB predeterminer 81 | DD determiner 82 | EX existential there 83 | GE genitive marker Õs 84 | II preposition 85 | JJ adjective 86 | JJR comparative adjective 87 | JJT superlative adjective 88 | MC number or numeric 89 | NN noun 90 | NNP proper noun 91 | NNS plural noun 92 | PN pronoun 93 | PND determiner as pronoun 94 | PNG genitive pronoun 95 | PNR relative pronoun 96 | RR adverb 97 | RRR comparative adverb 98 | RRT superlative adverb 99 | SYM symbol 100 | TO infinitive marker to 101 | VM modal 102 | VBB base be, am, are 103 | VBD past was, were 104 | VBG participle being 105 | VBI infinitive be 106 | VBN participle been 107 | VBZ 3rd pers. sing. is 108 | VDB base do 109 | VDD past did 110 | VDG participle doing 111 | VDI infinite do 112 | VDN participle done 113 | VDZ 3rd pers. sing. does 114 | VHB base have 115 | VHD past had 116 | VHG participle having 117 | VHI infinitive have 118 | VHN participle had 119 | VHZ 3rd pers. sing. has 120 | VVB base form lexical verb 121 | VVD past tense 122 | VVG present part. 123 | VVI infinitive lexical verb 124 | VVN past part. 125 | VVZ 3rd pers. sing. 126 | VVNJ prenominal past part. 127 | VVGJ prenominal present part. 128 | VVGN nominal gerund 129 | ( left parenthesis 130 | ) right parenthesis 131 | , comma 132 | . end-of-sentence period 133 | : dashes, colons 134 | ? ? right quo 135 | 136 |137 | -------------------------------------------------------------------------------- /FastTag/fasttag.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import qualified Data.Map as M 4 | import Data.Strings (strEndsWith, strStartsWith) 5 | import Data.List (isInfixOf) 6 | 7 | import LexiconData (lexicon) 8 | 9 | bigram :: [a] -> [[a]] 10 | bigram [] = [] 11 | bigram [_] = [] 12 | bigram xs = take 2 xs : bigram (tail xs) 13 | 14 | containsString word substring = isInfixOf substring word 15 | 16 | 17 | fixTags twogramList = 18 | map 19 | -- in the following inner function, [last,current] might be bound, 20 | -- for example, to [["dog","NN"],["ran","VBD"]] 21 | (\[last, current] -> 22 | -- rule 1: DT, {VBD | VBP} --> DT, NN 23 | if last !! 1 == "DT" && (current !! 1 == "VBD" || 24 | current !! 1 == "VB" || 25 | current !! 1 == "VBP") 26 | then "NN" 27 | else 28 | -- rule 2: convert a noun to a number (CD) if "." appears in the word 29 | if (current !! 1) !! 0 == 'N' && containsString (current !! 0) "." 30 | then "CD" 31 | else 32 | -- rule 3: convert a noun to a past participle if 33 | -- words.get(i) ends with "ed" 34 | if (current !! 1) !! 0 == 'N' && strEndsWith (current !! 0) "ed" 35 | then "VBN" 36 | else 37 | -- rule 4: convert any type to adverb if it ends in "ly" 38 | if strEndsWith (current !! 0) "ly" 39 | then "RB" 40 | else 41 | -- rule 5: convert a common noun (NN or NNS) to an 42 | -- adjective if it ends with "al" 43 | if strStartsWith (current !! 1) "NN" && 44 | strEndsWith (current !! 1) "al" 45 | then "JJ" 46 | else 47 | -- rule 6: convert a noun to a verb if the preceeding 48 | -- word is "would" 49 | if strStartsWith (current !! 1) "NN" && 50 | (last !! 0) == "would" -- should be case insensitive 51 | then "VB" 52 | else 53 | -- rule 7: if a word has been categorized as a 54 | -- common noun and it ends with "s", 55 | -- then set its type to plural common noun (NNS) 56 | if strStartsWith (current !! 1) "NN" && strEndsWith (current !! 0) "s" 57 | then "NNS" 58 | else 59 | -- rule 8: convert a common noun to a present 60 | -- participle verb (i.e., a gerand) 61 | if strStartsWith (current !! 1) "NN" && 62 | strEndsWith (current !! 0) "ing" 63 | then "VBG" 64 | else (current !! 1)) 65 | twogramList 66 | 67 | substitute tks = bigram $ map tagHelper tks 68 | 69 | tagHelper token = 70 | let tags = M.findWithDefault [] token lexicon in 71 | if tags == [] then [token, "NN"] else [token, tags] 72 | 73 | tag tokens = fixTags $ substitute ([""] ++ tokens) 74 | 75 | 76 | main = do 77 | let tokens = ["the", "dog", "ran", "around", "the", "tree", "while", 78 | "the", "cat", "snaked", "around", "the", "trunk", 79 | "while", "banking", "to", "the", "left"] 80 | print $ tag tokens 81 | print $ zip tokens $ tag tokens 82 | 83 | -------------------------------------------------------------------------------- /FastTag/stack.yaml: -------------------------------------------------------------------------------- 1 | # For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md 2 | 3 | # Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2) 4 | resolver: lts-22.26 5 | 6 | allow-newer: false 7 | 8 | # Local packages, usually specified by relative directory name 9 | packages: 10 | - '.' 11 | 12 | # Packages to be pulled from upstream that are not in the resolver (e.g., acme-missiles-0.3) 13 | extra-deps: 14 | - strings-1.1 15 | 16 | 17 | # Override default flag values for local packages and extra-deps 18 | flags: {} 19 | 20 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/.gitignore: -------------------------------------------------------------------------------- 1 | .stack-work/ 2 | HybridHaskellPythonDeepLearning.cabal 3 | *~ 4 | 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | .idea 10 | .vscode 11 | 12 | .DS_Store 13 | 14 | *~ 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | env/ 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | 35 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/ChangeLog.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mark-watson/haskell_tutorial_cookbook_examples/718d33078b76a496e13a9e618eab206eb92f0760/HybridHaskellPythonCorefAnaphoraResolution/ChangeLog.md -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/HybridHaskellPythonCorefAnaphoraResolution.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 1.12 2 | 3 | -- This file has been generated from package.yaml by hpack version 0.31.2. 4 | -- 5 | -- see: https://github.com/sol/hpack 6 | -- 7 | -- hash: f2bdd513eef0a939561c376956abebf1d1a11cf8ad2b963fa362ed0438ff30cd 8 | 9 | name: HybridHaskellPythonCorefAnaphoraResolution 10 | version: 0.1.0.0 11 | description: Please see the README.md 12 | homepage: https://github.com/mark-watson/haskell_tutorial_cookbook_examples 13 | author: Mark Watson 14 | copyright: 2016-2020 Mark Watson. All rights reserved. 15 | license: Apache-2.0 16 | license-file: ../APACHE_LICENSE-2.0.txt 17 | build-type: Simple 18 | extra-source-files: 19 | README.md 20 | ChangeLog.md 21 | 22 | source-repository head 23 | type: git 24 | location: https://github.com/githubuser/HybridHaskellPythonCorefAnaphoraResolution 25 | 26 | library 27 | exposed-modules: 28 | CorefWebClient 29 | other-modules: 30 | Paths_HybridHaskellPythonCorefAnaphoraResolution 31 | hs-source-dirs: 32 | src 33 | build-depends: 34 | base >=4.7 && <5, wreq, lens, bytestring, uri-encode >= 1.5.0.5 35 | default-language: Haskell2010 36 | 37 | executable HybridHaskellPythonCorefAnaphoraResolution-exe 38 | main-is: Main.hs 39 | other-modules: 40 | Paths_HybridHaskellPythonCorefAnaphoraResolution 41 | hs-source-dirs: 42 | app 43 | ghc-options: -threaded -rtsopts -with-rtsopts=-N 44 | build-depends: 45 | HybridHaskellPythonCorefAnaphoraResolution, wreq, lens, bytestring 46 | , base >=4.7 && <5, uri-encode >= 1.5.0.5 47 | default-language: Haskell2010 48 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/README.md: -------------------------------------------------------------------------------- 1 | # HybridHaskellPythonCorefAnaphoraResolution 2 | 3 | This project uses a the BERT model with the spaCy NP library. 4 | 5 | To run the Python server that the Haskell code in this project calls, 'cd python_coreference_anaphora_resolution_server' 6 | and follow the instructions in the README.md file. 7 | 8 | ## Running the Haskell client 9 | 10 | stack build --fast --exec HybridHaskellPythonCorefAnaphoraResolution-exe -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/app/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import CorefWebClient 4 | 5 | main :: IO () 6 | main = do 7 | putStrLn "Enter text (all on one line)" 8 | s <- getLine 9 | response <- corefClient s 10 | putStr "response from coreference server:\t" 11 | putStrLn $ show response 12 | main -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/python_coreference_anaphora_resolution_server/Makefile: -------------------------------------------------------------------------------- 1 | run: 2 | nohup python server_coref.py > /tmp/coref_service.txt & 3 | 4 | stop: 5 | pkill -f server_coref 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/python_coreference_anaphora_resolution_server/README.md: -------------------------------------------------------------------------------- 1 | # Installation: 2 | 3 | pip install spacy==2.1.0 # see discusion in book of either using older version of spaCy or building neuralcoref from source 4 | pip install neuralcoref 5 | pip install -U spacy 6 | python -m spacy download en 7 | pip install falcon 8 | 9 | python setup.py install 10 | 11 | 12 | This installs the executable **corefserver** on your path so you can run the server from any 13 | directory using: 14 | 15 | corefserver 16 | 17 | ## If you get an error 18 | 19 | ValueError: spacy.strings.StringStore size changed, may indicate binary incompatibility. Expected 112 from C header, got 88 from PyObject 20 | 21 | Then you can either downgrade spaCy to 2.1.0, or build neuralcoref from source if you want to use a more recent version of spaCy. 22 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/python_coreference_anaphora_resolution_server/bin/corefserver: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Coreference resolution server example. 4 | A simple server serving the coreference system. 5 | 6 | This file is copied and modified from an example 7 | program from https://github.com/huggingface/neuralcoref 8 | 9 | """ 10 | from __future__ import unicode_literals 11 | from __future__ import print_function 12 | 13 | from corefserver.coref_server import AllResource 14 | from wsgiref.simple_server import make_server 15 | import falcon 16 | 17 | if __name__ == '__main__': 18 | RESSOURCE = AllResource() 19 | APP = falcon.API() 20 | APP.add_route('/', RESSOURCE) 21 | HTTPD = make_server('0.0.0.0', 8000, APP) 22 | HTTPD.serve_forever() 23 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/python_coreference_anaphora_resolution_server/corefserver/coref_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Coreference resolution server example. 4 | A simple server serving the coreference system. 5 | 6 | This file is copied and modified from an example 7 | program from https://github.com/huggingface/neuralcoref 8 | 9 | """ 10 | from __future__ import unicode_literals 11 | from __future__ import print_function 12 | 13 | import json 14 | from wsgiref.simple_server import make_server 15 | import falcon 16 | import spacy 17 | 18 | unicode_ = str # Python 3 19 | 20 | 21 | class AllResource(object): 22 | def __init__(self): 23 | #self.nlp = spacy.load('en_coref_sm') 24 | # MLW: 25 | self.nlp = spacy.load('en') 26 | import neuralcoref 27 | neuralcoref.add_to_pipe(self.nlp) 28 | print("Server loaded") 29 | self.response = None 30 | 31 | def on_get(self, req, resp): 32 | self.response = {} 33 | 34 | text_param = req.get_param("text") 35 | no_detail = req.get_param("no_detail") 36 | if text_param is not None: 37 | 38 | text = ",".join(text_param) if isinstance(text_param, list) else text_param 39 | text = unicode_(text) 40 | text = text.replace("%20", " ").replace("%3B", ";").replace("%2C", ",").replace("%3A", ":").replace("%24","$").replace("%2C",",") 41 | print("** text=", text) 42 | doc = self.nlp(text) 43 | #print("** doc=", doc) 44 | if no_detail != None: 45 | self.response = doc._.coref_resolved 46 | resp.body = self.response 47 | resp.content_type = 'application/text' 48 | else: 49 | if doc._.has_coref: 50 | mentions = [{'start': mention.start_char, 51 | 'end': mention.end_char, 52 | 'text': mention.text, 53 | 'resolved': cluster.main.text 54 | } 55 | for cluster in doc._.coref_clusters 56 | for mention in cluster.mentions] 57 | clusters = list(list(span.text for span in cluster) 58 | for cluster in doc._.coref_clusters) 59 | resolved = doc._.coref_resolved 60 | self.response['mentions'] = mentions 61 | self.response['clusters'] = clusters 62 | self.response['resolved'] = resolved 63 | 64 | resp.body = json.dumps(self.response) 65 | resp.content_type = 'application/json' 66 | resp.append_header('Access-Control-Allow-Origin', "*") 67 | resp.status = falcon.HTTP_200 68 | 69 | if __name__ == '__main__': 70 | RESSOURCE = AllResource() 71 | APP = falcon.API() 72 | APP.add_route('/', RESSOURCE) 73 | HTTPD = make_server('0.0.0.0', 8000, APP) 74 | HTTPD.serve_forever() 75 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/python_coreference_anaphora_resolution_server/corefserver/test/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mark-watson/haskell_tutorial_cookbook_examples/718d33078b76a496e13a9e618eab206eb92f0760/HybridHaskellPythonCorefAnaphoraResolution/python_coreference_anaphora_resolution_server/corefserver/test/README.md -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/python_coreference_anaphora_resolution_server/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup(name='coref_server', 4 | version='0.11', 5 | packages=['corefserver', 'corefserver.test'], 6 | license='Apache 2', 7 | scripts=['bin/corefserver'], 8 | long_description=open('README.md').read()) 9 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/python_coreference_anaphora_resolution_server/test_client.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from urllib.request import Request, urlopen 4 | import urllib 5 | 6 | base_uri = 'http://127.0.0.1:8000?text=' 7 | 8 | 9 | def coref(text, no_detail=False): 10 | def get_raw_data_from_web(a_uri): 11 | req = Request(a_uri, headers={'User-Agent': 'PythonBook/1.0'}) 12 | http_response = urlopen(req) 13 | data = http_response.read() 14 | return data 15 | 16 | encoded_text = urllib.parse.quote(text, safe='') 17 | if no_detail: 18 | z = '&no_detail=1' 19 | else: 20 | z = '' 21 | raw_data = get_raw_data_from_web(base_uri + encoded_text + z) 22 | return raw_data.decode("UTF8") 23 | 24 | 25 | print(coref('My sister has a dog named Sam. She loves him')) 26 | print(coref('My sister has a dog named Sam. She loves him', no_detail=True)) 27 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/src/CorefWebClient.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | -- reference: http://www.serpentine.com/wreq/tutorial.html 4 | module CorefWebClient 5 | ( corefClient 6 | ) where 7 | 8 | import Control.Lens 9 | import Data.ByteString.Lazy.Char8 (unpack) 10 | import Data.Maybe (fromJust) 11 | import Network.URI.Encode (encode) 12 | import Network.Wreq 13 | 14 | base_url = "http://127.0.0.1:8000?text=" 15 | 16 | corefClient :: [Char] -> IO [Char] 17 | corefClient query = do 18 | putStrLn $ "\n\n*** Processing " ++ (encode query) 19 | r <- get $ base_url ++ (encode query) ++ "&no_detail=1" 20 | putStrLn $ "status code: " ++ (show (r ^. responseStatus . statusCode)) 21 | putStrLn $ "content type: " ++ (show (r ^? responseHeader "Content-Type")) 22 | putStrLn $ "response body: " ++ (unpack (fromJust (r ^? responseBody))) 23 | return $ unpack (fromJust (r ^? responseBody)) 24 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/stack.yaml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by 'stack init' 2 | # 3 | # Some commonly used options have been documented as comments in this file. 4 | # For advanced use and comprehensive documentation of the format, please see: 5 | # https://docs.haskellstack.org/en/stable/yaml_configuration/ 6 | 7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version. 8 | # A snapshot resolver dictates the compiler version and the set of packages 9 | # to be used for project dependencies. For example: 10 | # 11 | resolver: lts-22.26 12 | 13 | allow-newer: true 14 | 15 | # User packages to be built. 16 | # Various formats can be used as shown in the example below. 17 | # 18 | # packages: 19 | # - some-directory 20 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz 21 | # subdirs: 22 | # - auto-update 23 | # - wai 24 | packages: 25 | - . 26 | # Dependency packages to be pulled from upstream that are not in the resolver. 27 | # These entries can reference officially published versions as well as 28 | # forks / in-progress versions pinned to a git hash. For example: 29 | # 30 | # extra-deps: 31 | # - acme-missiles-0.3 32 | # - git: https://github.com/commercialhaskell/stack.git 33 | # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a 34 | # 35 | # extra-deps: [] 36 | 37 | # Override default flag values for local packages and extra-deps 38 | # flags: {} 39 | 40 | # Extra package databases containing global packages 41 | # extra-package-dbs: [] 42 | 43 | # Control whether we use the GHC we find on the path 44 | # system-ghc: true 45 | # 46 | # Require a specific version of stack, using version ranges 47 | # require-stack-version: -any # Default 48 | # require-stack-version: ">=2.1" 49 | # 50 | # Override the architecture used by stack, especially useful on Windows 51 | # arch: i386 52 | # arch: x86_64 53 | # 54 | # Extra directories used by stack for building 55 | # extra-include-dirs: [/path/to/dir] 56 | # extra-lib-dirs: [/path/to/dir] 57 | # 58 | # Allow a newer minor version of GHC than the snapshot specifies 59 | # compiler-check: newer-minor 60 | -------------------------------------------------------------------------------- /HybridHaskellPythonCorefAnaphoraResolution/test/Spec.hs: -------------------------------------------------------------------------------- 1 | main :: IO () 2 | main = putStrLn "Test suite not yet implemented" 3 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/.gitignore: -------------------------------------------------------------------------------- 1 | .stack-work/ 2 | HybridHaskellPythonNlp.cabal 3 | *~ 4 | 5 | 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | .idea 11 | .vscode 12 | 13 | .DS_Store 14 | 15 | *~ 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | env/ 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | wheels/ 35 | 36 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/ChangeLog.md: -------------------------------------------------------------------------------- 1 | # Changelog for HybridHaskellPythonNlp 2 | 3 | ## Unreleased changes 4 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Mark Watson (c) 2019 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Author name here nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/README.md: -------------------------------------------------------------------------------- 1 | # HybridHaskellPythonNlp 2 | 3 | ## Start the Python Server 4 | 5 | I assume that it is installed (see the book for directions) or see the readme file in the subdirectory python_spacy_nlp_server 6 | 7 | 8 | ## Running the Haskell client 9 | 10 | stack build --fast --exec HybridHaskellPythonNlp-exe 11 | 12 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/app/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import NlpWebClient 4 | 5 | main :: IO () 6 | main = do 7 | putStrLn "Enter text (all on one line)" 8 | s <- getLine 9 | response <- (nlpClient s) :: IO NlpResponse 10 | putStr "response from NLP server:\n" 11 | putStrLn $ show response 12 | main -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/python_spacy_nlp_server/Makefile: -------------------------------------------------------------------------------- 1 | run: 2 | nohup python server_spacy.py > /tmp/AISspacy.txt & 3 | 4 | stop: 5 | pkill -f server_spacy 6 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/python_spacy_nlp_server/README.md: -------------------------------------------------------------------------------- 1 | # Installation: 2 | 3 | pip install -U spacy 4 | python -m spacy download en 5 | pip install falcon 6 | 7 | python setup.py install 8 | 9 | bin/spacynlpserver 10 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/python_spacy_nlp_server/bin/spacynlpserver: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Coreference resolution server example. 4 | A simple server serving the coreference system. 5 | 6 | This file is copied and modified from an example 7 | program from https://github.com/huggingface/neuralcoref 8 | 9 | """ 10 | from __future__ import unicode_literals 11 | from __future__ import print_function 12 | 13 | from spacynlpserver.server_spacy import AllResource 14 | 15 | import json 16 | from wsgiref.simple_server import make_server 17 | import falcon 18 | 19 | if __name__ == '__main__': 20 | RESSOURCE = AllResource() 21 | APP = falcon.API() 22 | APP.add_route('/', RESSOURCE) 23 | HTTPD = make_server('0.0.0.0', 8008, APP) 24 | HTTPD.serve_forever() 25 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/python_spacy_nlp_server/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup(name='spacy_nlp_server', 4 | version='0.11', 5 | packages=['spacynlpserver', 'spacynlpserver.test'], 6 | license='Apache 2', 7 | scripts=['bin/spacynlpserver'], 8 | long_description=open('README.md').read()) 9 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/python_spacy_nlp_server/spacynlpserver/server_spacy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Coreference resolution server example. 4 | A simple server serving the coreference system. 5 | 6 | This file is copied and modified from an example 7 | program from https://github.com/huggingface/neuralcoref 8 | 9 | """ 10 | from __future__ import unicode_literals 11 | from __future__ import print_function 12 | 13 | import json 14 | from wsgiref.simple_server import make_server 15 | import falcon 16 | import spacy 17 | 18 | unicode_ = str # Python 3 19 | 20 | class AllResource(object): 21 | def __init__(self): 22 | self.nlp = spacy.load('en') 23 | print("Server loaded") 24 | self.response = None 25 | 26 | def on_get(self, req, resp): 27 | self.response = {} 28 | 29 | text_param = req.get_param("text") 30 | if text_param is not None: 31 | 32 | text = ",".join(text_param) if isinstance(text_param, list) else text_param 33 | text = unicode_(text) 34 | text = text.replace("%20", " ").replace("%3B", ";").replace("%2C", ",").replace("%3A", ":").replace("%24","$").replace("%2C",",") 35 | print("** text=", text) 36 | doc = self.nlp(text) 37 | #print("** doc=", doc) 38 | self.response['entities'] = [ent.text + "/" + ent.label_ for ent in doc.ents] 39 | self.response['tokens'] = [token.text for token in doc] 40 | 41 | resp.body = json.dumps(self.response) 42 | resp.content_type = 'application/json' 43 | resp.append_header('Access-Control-Allow-Origin', "*") 44 | resp.status = falcon.HTTP_200 45 | 46 | if __name__ == '__main__': 47 | RESSOURCE = AllResource() 48 | APP = falcon.API() 49 | APP.add_route('/', RESSOURCE) 50 | HTTPD = make_server('0.0.0.0', 8008, APP) 51 | HTTPD.serve_forever() 52 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/python_spacy_nlp_server/spacynlpserver/test/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mark-watson/haskell_tutorial_cookbook_examples/718d33078b76a496e13a9e618eab206eb92f0760/HybridHaskellPythonNlp/python_spacy_nlp_server/spacynlpserver/test/README.md -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/python_spacy_nlp_server/test_client.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from urllib.request import Request, urlopen 4 | import urllib 5 | import json 6 | 7 | base_uri = 'http://127.0.0.1:8008?text=' 8 | 9 | 10 | def spacy_client(text, no_detail=False): 11 | def get_raw_data_from_web(a_uri): 12 | req = Request(a_uri, headers={'User-Agent': 'PythonBook/1.0'}) 13 | http_response = urlopen(req) 14 | data = http_response.read() 15 | return data 16 | 17 | encoded_text = urllib.parse.quote(text) 18 | raw_data = get_raw_data_from_web(base_uri + encoded_text) 19 | return json.loads(raw_data.decode("UTF8")) 20 | 21 | 22 | print(spacy_client('Bill Clinton went to the Pepsi factory in Mexico.')) 23 | print(spacy_client('The Mark Jones ran. Sam Bell ran faster.')) 24 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/src/NlpWebClient.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE DeriveDataTypeable #-} 3 | 4 | -- reference: http://www.serpentine.com/wreq/tutorial.html 5 | module NlpWebClient 6 | ( nlpClient, NlpResponse 7 | ) where 8 | 9 | import Control.Lens 10 | import Data.ByteString.Lazy.Char8 (unpack) 11 | import Data.Maybe (fromJust) 12 | import Network.URI.Encode as E -- encode is also in Data.Aeson 13 | import Network.Wreq 14 | 15 | import Text.JSON.Generic 16 | 17 | data NlpResponse = NlpResponse {entities::[String], tokens::[String]} deriving (Show, Data, Typeable) 18 | 19 | base_url = "http://127.0.0.1:8008?text=" 20 | 21 | nlpClient :: [Char] -> IO NlpResponse 22 | nlpClient query = do 23 | putStrLn $ "\n\n*** Processing " ++ query 24 | r <- get $ base_url ++ (E.encode query) ++ "&no_detail=1" 25 | --putStrLn $ "status code: " ++ (show (r ^. responseStatus . statusCode)) 26 | --putStrLn $ "content type: " ++ (show (r ^? responseHeader "Content-Type")) 27 | --putStrLn $ "response body: " ++ (unpack (fromJust (r ^? responseBody))) 28 | let ret = (decodeJSON (unpack (fromJust (r ^? responseBody)))) :: NlpResponse 29 | return ret 30 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/stack.yaml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by 'stack init' 2 | # 3 | # Some commonly used options have been documented as comments in this file. 4 | # For advanced use and comprehensive documentation of the format, please see: 5 | # https://docs.haskellstack.org/en/stable/yaml_configuration/ 6 | 7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version. 8 | # A snapshot resolver dictates the compiler version and the set of packages 9 | # to be used for project dependencies. For example: 10 | # 11 | resolver: lts-22.26 12 | 13 | allow-newer: true 14 | 15 | # User packages to be built. 16 | # Various formats can be used as shown in the example below. 17 | # 18 | # packages: 19 | # - some-directory 20 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz 21 | # subdirs: 22 | # - auto-update 23 | # - wai 24 | packages: 25 | - . 26 | extra-deps: 27 | - json-0.10 28 | # Dependency packages to be pulled from upstream that are not in the resolver. 29 | # These entries can reference officially published versions as well as 30 | # forks / in-progress versions pinned to a git hash. For example: 31 | # 32 | # extra-deps: 33 | # - acme-missiles-0.3 34 | # - git: https://github.com/commercialhaskell/stack.git 35 | # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a 36 | # 37 | # extra-deps: [] 38 | 39 | # Override default flag values for local packages and extra-deps 40 | # flags: {} 41 | 42 | # Extra package databases containing global packages 43 | # extra-package-dbs: [] 44 | 45 | # Control whether we use the GHC we find on the path 46 | # system-ghc: true 47 | # 48 | # Require a specific version of stack, using version ranges 49 | # require-stack-version: -any # Default 50 | # require-stack-version: ">=2.1" 51 | # 52 | # Override the architecture used by stack, especially useful on Windows 53 | # arch: i386 54 | # arch: x86_64 55 | # 56 | # Extra directories used by stack for building 57 | # extra-include-dirs: [/path/to/dir] 58 | # extra-lib-dirs: [/path/to/dir] 59 | # 60 | # Allow a newer minor version of GHC than the snapshot specifies 61 | # compiler-check: newer-minor 62 | -------------------------------------------------------------------------------- /HybridHaskellPythonNlp/test/Spec.hs: -------------------------------------------------------------------------------- 1 | main :: IO () 2 | main = putStrLn "Test suite not yet implemented" 3 | -------------------------------------------------------------------------------- /ImPure/.gitignore: -------------------------------------------------------------------------------- 1 | temp.txt 2 | -------------------------------------------------------------------------------- /ImPure/CommonWords.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Data.Set (fromList, toList, intersection) 4 | import Data.Char (toLower) 5 | 6 | fileToWords fileName = do 7 | fileText <- readFile fileName 8 | return $ (fromList . words) (map toLower fileText) 9 | 10 | commonWords file1 file2 = do 11 | words1 <- fileToWords file1 12 | words2 <- fileToWords file2 13 | return $ toList $ intersection words1 words2 14 | 15 | commonWords2 file1 file2 = 16 | fileToWords file1 >>= \f1 -> 17 | fileToWords file2 >>= \f2 -> 18 | return $ toList $ intersection f1 f2 19 | 20 | commonWords3 file1 file2 = 21 | (\f1 f2 -> toList $ intersection f1 f2) 22 | <$> fileToWords file1 23 | <*> fileToWords file2 24 | 25 | main = do 26 | cw <- commonWords "text1.txt" "text2.txt" 27 | print cw 28 | cw2 <- commonWords2 "text1.txt" "text2.txt" 29 | print cw2 30 | cw3 <- commonWords3 "text1.txt" "text2.txt" 31 | print cw3 32 | 33 | -------------------------------------------------------------------------------- /ImPure/DoLetExample.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | example1 = do -- good style 4 | putStrLn "Enter an integer number:" 5 | s <- getLine 6 | let number = (read s :: Int) + 2 7 | putStrLn $ "Number plus 2 = " ++ (show number) 8 | 9 | example2 = do -- avoid using "in" inside a do statement 10 | putStrLn "Enter an integer number:" 11 | s <- getLine 12 | let number = (read s :: Int) + 2 in 13 | putStrLn $ "Number plus 2 = " ++ (show number) 14 | 15 | example3 = do -- avoid using "in" inside a do statement 16 | putStrLn "Enter an integer number:" 17 | s <- getLine 18 | let number = (read s :: Int) + 2 in 19 | do 20 | putStrLn "Result is:" 21 | putStrLn $ "Number plus 2 = " ++ (show number) 22 | 23 | main = do 24 | example1 25 | example2 26 | example3 -------------------------------------------------------------------------------- /ImPure/DoLetExample2.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | example1 = do 4 | putStrLn "Enter an integer number:" 5 | s <- getLine 6 | let number = (read s :: Int) + 2 7 | putStrLn $ "Number plus 2 = " ++ (show number) 8 | 9 | example2 = do 10 | putStrLn "Enter an integer number:" 11 | s <- getLine 12 | let number = (read s :: Int) + 2 in 13 | putStrLn $ "Number plus 2 = " ++ (show number) 14 | 15 | main = example1 >> example2 >> example1 -------------------------------------------------------------------------------- /ImPure/DoLetExample3.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | example3 = putStrLn "Enter an integer number:" >> getLine 4 | 5 | example4 mv = do 6 | let number = (read mv :: Int) + 2 7 | putStrLn $ "Number plus 2 = " ++ (show number) 8 | 9 | main = example3 >>= example4 10 | -------------------------------------------------------------------------------- /ImPure/FmapExample.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | fileToWords fileName = do 4 | fileText <- readFile fileName 5 | return $ words fileText 6 | 7 | main = do 8 | words1 <- fileToWords "text1.txt" 9 | print $ reverse words1 10 | words2 <- fmap reverse $ fileToWords "text1.txt" 11 | print words2 12 | 13 | -------------------------------------------------------------------------------- /ImPure/ImPure.cabal: -------------------------------------------------------------------------------- 1 | name: ImPure 2 | version: 0.1.0.0 3 | synopsis: ImPure 4 | description: Please see README.md 5 | homepage: https://github.com/mark-watson/haskell_tutorial_cookbook_examples 6 | license: Apache-2.0 7 | license-file: ../APACHE_LICENSE-2.0.txt 8 | author: Mark Watson 9 | maintainer: markw@markwatson.com 10 | copyright: Copyright 2016 Mark Watson. All rights reserved 11 | category: dev 12 | build-type: Simple 13 | cabal-version: 1.22.4.0 14 | 15 | executable DoLetExample 16 | hs-source-dirs: . 17 | main-is: DoLetExample.hs 18 | default-language: Haskell2010 19 | build-depends: base >= 4.7 && < 5 20 | 21 | executable DoLetExample2 22 | hs-source-dirs: . 23 | main-is: DoLetExample2.hs 24 | default-language: Haskell2010 25 | build-depends: base >= 4.7 && < 5 26 | 27 | executable DoLetExample3 28 | hs-source-dirs: . 29 | main-is: DoLetExample3.hs 30 | default-language: Haskell2010 31 | build-depends: base >= 4.7 && < 5 32 | 33 | executable FmapExample 34 | hs-source-dirs: . 35 | main-is: FmapExample.hs 36 | default-language: Haskell2010 37 | build-depends: base >= 4.7 && < 5 38 | 39 | executable CommonWords 40 | hs-source-dirs: . 41 | main-is: CommonWords.hs 42 | default-language: Haskell2010 43 | build-depends: base >= 4.7 && < 5, containers 44 | 45 | 46 | -------------------------------------------------------------------------------- /ImPure/README.md: -------------------------------------------------------------------------------- 1 | # Running the program snippets from the Pure Haskell Tutorial Chapter 2 | 3 | You can run the examples interactively: 4 | 5 | ~~~~~~~~ 6 | stack ghci 7 | ~~~~~~~~ 8 | 9 | or build and run them: 10 | 11 | ~~~~~~~~ 12 | stack build 13 | stack exec CommonWords 14 | stack exec DoLetExample 15 | stack exec DoLetExample2 16 | stack exec DoLetExample3 17 | stack exec FmapExample 18 | ~~~~~~~~ 19 | 20 | 21 | These simple examples are some of the code used in the tutorial chapter on "impure" Haskell. 22 | 23 | 24 | -------------------------------------------------------------------------------- /ImPure/stack.yaml: -------------------------------------------------------------------------------- 1 | # Read the resolvr version number from ~/.stack/config.yaml 2 | 3 | resolver: lts-22.26 4 | 5 | allow-newer: false 6 | 7 | # Local packages, usually specified by relative directory name 8 | packages: 9 | - '.' 10 | 11 | # Packages to be pulled from upstream that are not in the resolver (e.g., acme-missiles-0.3) 12 | extra-deps: [] 13 | 14 | # Override default flag values for local packages and extra-deps 15 | flags: {} 16 | 17 | # Control whether we use the GHC we find on the path 18 | # system-ghc: true 19 | 20 | # Require a specific version of stack, using version ranges 21 | # require-stack-version: -any # Default 22 | # require-stack-version: >= 0.1.4.0 23 | 24 | # Override the architecture used by stack, especially useful on Windows 25 | # arch: i386 26 | # arch: x86_64 27 | 28 | # Extra directories used by stack for building 29 | # extra-include-dirs: [/path/to/dir] 30 | # extra-lib-dirs: [/path/to/dir] 31 | -------------------------------------------------------------------------------- /ImPure/text1.txt: -------------------------------------------------------------------------------- 1 | The dog chased the cat down the street. 2 | 3 | -------------------------------------------------------------------------------- /ImPure/text2.txt: -------------------------------------------------------------------------------- 1 | The dog and cat were friends. 2 | 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | cd Blackjack; stack build 3 | cd ClientServer; stack build 4 | cd CommandLineApp; stack build 5 | echo "skipping building Database-postgres" 6 | cd Database-sqlite; stack build 7 | cd debugging; stack build 8 | cd ImPure; stack build 9 | cd NlpTool; stack build 10 | cd OpenAiApiClient; stack build 11 | cd knowledge_graph_creator_pure; stack build 12 | cd Pure; stack build 13 | cd SparqlClient; stack build 14 | cd StateMonad; stack build 15 | cd TextProcessing; stack build 16 | cd Timers; stack build 17 | cd WebScraping; cabal build 18 | echo "There may be problems compiling the getenerated LexData source files in LexiconData.hs:" 19 | cd FastTag; stack build 20 | 21 | clean: 22 | rm -r -f */.stack-work */dist-newstyle */*.dyn* 23 | 24 | update_stack_resolver_macOs: 25 | sed -i '' 's/^resolver: .*/resolver: lts-22.26/' */stack.yaml 26 | 27 | update_stack_resolver_linux: 28 | sed -i 's/^resolver: .*/resolver: lts-22.26/' */stack.yaml 29 | 30 | -------------------------------------------------------------------------------- /NlpTool/.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | cabal-dev 3 | .cabal-sandbox 4 | *.o 5 | *.hi 6 | *.chi 7 | *.chs.h 8 | .virtualenv 9 | .hsenv 10 | .cabal-sandbox/ 11 | cabal.sandbox.config 12 | *.aes 13 | .stack-work 14 | -------------------------------------------------------------------------------- /NlpTool/NlpTool.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 1.12 2 | 3 | name: NlpTool 4 | version: 0.1.0.0 5 | description: Please see the README.md 6 | homepage: https://github.com/mark-watson/haskell_tutorial_cookbook_examples 7 | author: Mark Watson 8 | maintainer: mark@markwatson.com 9 | copyright: 2019 Mark Watson 10 | license: Apache-2.0 11 | license-file: ../APACHE_LICENSE-2.0.txt 12 | build-type: Simple 13 | extra-source-files: 14 | README.md 15 | 16 | source-repository head 17 | type: git 18 | location: https://github.com/githubuser/NlpTool 19 | 20 | library 21 | exposed-modules: 22 | Categorize 23 | Entities 24 | Sentence 25 | Summarize 26 | NlpUtils 27 | Stemmer 28 | BroadcastNetworkNamesDbPedia 29 | Category1Gram 30 | Category2Gram 31 | CityNamesDbpedia 32 | CompanyNamesDbpedia 33 | CountryNamesDbpedia 34 | PeopleDbPedia 35 | PoliticalPartyNamesDbPedia 36 | TradeUnionNamesDbPedia 37 | UniversityNamesDbPedia 38 | other-modules: 39 | Paths_NlpTool 40 | hs-source-dirs: 41 | src 42 | src/nlp 43 | src/nlp/data 44 | build-depends: 45 | base >=4.7 && <5, wreq, lens, bytestring 46 | , hxt, json, uri-encode >= 1.5.0.5, MissingH 47 | , aeson >= 1.4.2.0, containers, split, text 48 | , directory, yaml, aeson, string-conversions 49 | , MissingH, json >= 0.9.3 50 | default-language: Haskell2010 51 | 52 | executable NlpTool-exe 53 | main-is: NlpTool.hs 54 | other-modules: 55 | Paths_NlpTool 56 | hs-source-dirs: 57 | app 58 | ghc-options: -threaded -rtsopts -with-rtsopts=-N 59 | build-depends: 60 | NlpTool 61 | , base >=4.7 && <5 62 | , MissingH 63 | , directory, json >= 0.9.3 64 | default-language: Haskell2010 65 | -------------------------------------------------------------------------------- /NlpTool/README.md: -------------------------------------------------------------------------------- 1 | kbnlp.hs 2 | ======== 3 | 4 | Mark Watson's Haskell NLP Experiments 5 | 6 | Copyright 2014 by Mark Watson. All rights reserved. 7 | 8 | License: AGPL version 3 license. (Note: I own the copyright of all of the code in this project; contact me if you need a commercial license.) 9 | 10 | This project also includes some experiments using Haskell for SPARQL clients. 11 | 12 | I am currently working on using, when possible, DBPedia URIs as identifiers for entities detected in text. As is much of this project, this is a work in progress. 13 | 14 | 15 | ## Credits 16 | 17 | I use the Haskell stemmer written by Dmitry Antonyuk and the sentence splitting code written by Eric Kow. 18 | 19 | Thanks!! 20 | 21 | ## Generated code 22 | 23 | There are a fair number of Haskell "source" files that were generated by Ruby scripts that are not included in this project. 24 | 25 | These files (e.g., CityNamesDbpedia.hs, Category1Gram.hs, Category2Gram.hs, etc.) mostly use Data.Map.fromList to create in-memory maps for lingusitic and other data. These files are not particularly interesting. The more interesting code is found in the top level files Summarize.hs, Entities.hs, etc. 26 | 27 | ## Building the NlpTool-exe executable: 28 | 29 | stack build --fast --exec NlpTool-exe 30 | 31 | 32 | ## Running the NLP examples Interactively - with stack 33 | 34 | Install stack: https://github.com/commercialhaskell/stack/wiki/Downloads 35 | 36 | stack init 37 | 38 | stack build 39 | 40 | stack ghci 41 | 42 | stack exec NlpTool 43 | 44 | Inside stack ghci: 45 | 46 | stack ghci 47 | 48 | ~~~~~~~~ 49 | :l Categorize.hs 50 | main 51 | 52 | :l Summarize.hs 53 | main 54 | ~~~~~~~~ 55 | 56 | etc. 57 | -------------------------------------------------------------------------------- /NlpTool/app/NlpTool.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | -- Copyright 2014 by Mark Watson. All rights reserved. 4 | -- The software and data in this project can be used under the terms of the GPL version 3 license or APache 2 license. 5 | 6 | -- experiments with creating a command line tool for categorization and entity detection 7 | 8 | import System.IO 9 | import Text.JSON (showJSON, encode) 10 | 11 | import NlpUtils 12 | 13 | import Categorize 14 | import Entities 15 | import Summarize 16 | 17 | main = do 18 | putStrLn "Enter text (all on one line)" 19 | s <- getLine 20 | let cats = bestCategories (splitWords s); 21 | bestCat = if not (null cats) then fst (head cats) else ""; 22 | sum = summarizeS s; 23 | spwkc = splitWordsKeepCase s; 24 | people = encode $ showJSON $ peopleNames spwkc; 25 | countries = encode $ showJSON $ countryNames spwkc; 26 | companies = encode $ showJSON $ companyNames spwkc; 27 | result = encode $ showJSON [bestCat, sum]; 28 | result2 = encode $ showJSON [people, countries, companies] in 29 | do 30 | putStr "category:\t" 31 | putStrLn bestCat 32 | putStr "summary:\t" 33 | putStrLn sum 34 | putStr "people:\t" 35 | putStrLn people 36 | putStr "companies:\t" 37 | putStrLn companies 38 | putStr "countries:\t" 39 | putStrLn countries 40 | main 41 | -------------------------------------------------------------------------------- /NlpTool/src/nlp/Categorize.hs: -------------------------------------------------------------------------------- 1 | -- Copyright 2014-2016 by Mark Watson. All rights reserved. 2 | -- The software and data in this project can be used under the terms of the AGPL version 3. 3 | module Categorize 4 | ( bestCategories 5 | , splitWords 6 | , bigram 7 | ) where 8 | 9 | import Data.List (sortBy) 10 | import qualified Data.Map as M 11 | 12 | import Category1Gram (onegrams) 13 | import Category2Gram (twograms) 14 | 15 | import Sentence (segment) 16 | 17 | import Stemmer (stem) 18 | 19 | import NlpUtils (bigram, bigram_s, splitWords) 20 | 21 | catnames1 = map fst onegrams 22 | 23 | catnames2 = map fst twograms 24 | 25 | stemWordsInString :: String -> [Char] 26 | stemWordsInString s = init $ concatMap ((++ " ") . stem) (splitWords s) -- why discard last item in list? 27 | 28 | stemScoredWordList = map (\(str, score) -> (stemWordsInString str, score)) 29 | 30 | stemHelper = 31 | map 32 | (\(category, swl) -> 33 | (category, M.fromList (stemScoredWordList (M.toList swl)))) 34 | 35 | stem2 :: [([Char], M.Map [Char] Double)] 36 | stem2 = stemHelper twograms 37 | 38 | stem1 :: [([Char], M.Map [Char] Double)] 39 | stem1 = stemHelper onegrams 40 | 41 | scoreCat wrds amap = sum $ map (\x -> M.findWithDefault 0.0 x amap) wrds 42 | 43 | score wrds amap = 44 | filter (\(a, b) -> b > 0.9) $ 45 | zip [0 ..] $ map (\(s, m) -> scoreCat wrds m) amap 46 | 47 | cmpScore (a1, b1) (a2, b2) = compare b2 b1 48 | 49 | bestCategoriesHelper wrds ngramMap categoryNames = 50 | let tg = bigram_s wrds 51 | in map (\(a, b) -> (categoryNames !! a, b)) $ 52 | sortBy cmpScore $ score wrds ngramMap 53 | 54 | bestCategories1 wrds = take 3 $ bestCategoriesHelper wrds onegrams catnames1 55 | 56 | bestCategories2 :: [[Char]] -> [([Char], Double)] 57 | bestCategories2 wrds = 58 | take 3 $ bestCategoriesHelper (bigram_s wrds) twograms catnames2 59 | 60 | bestCategories1stem :: [[Char]] -> [([Char], Double)] 61 | bestCategories1stem wrds = take 3 $ bestCategoriesHelper wrds stem1 catnames1 62 | 63 | bestCategories2stem :: [[Char]] -> [([Char], Double)] 64 | bestCategories2stem wrds = 65 | take 3 $ bestCategoriesHelper (bigram_s wrds) stem2 catnames2 66 | 67 | bestCategories :: [String] -> [(String, Double)] 68 | bestCategories wrds = map do_normalization_to_probabilities non_normalized 69 | where 70 | sum1 = 71 | M.unionWith 72 | (+) 73 | (M.fromList $ bestCategories1 wrds) 74 | (M.fromList $ bestCategories2 wrds) 75 | sum2 = 76 | M.unionWith 77 | (+) 78 | (M.fromList $ bestCategories1stem wrds) 79 | (M.fromList $ bestCategories2stem wrds) 80 | non_normalized = sortBy cmpScore $ M.toList $ M.unionWith (+) sum1 sum2 81 | total_scores = foldl (+) 0 $ map snd non_normalized 82 | do_normalization_to_probabilities (name, value) = 83 | (name, value / total_scores) 84 | -------------------------------------------------------------------------------- /NlpTool/src/nlp/Entities.hs: -------------------------------------------------------------------------------- 1 | -- Copyright 2014 by Mark Watson. All rights reserved. The software and data in this project can be used under the terms of either the GPL version 3 license or the Apache 2 license. 2 | -- Identify entities (people, places, companies, etc.) in text and 3 | -- return entities and URIs to further information in DBPedia/WikiPedia 4 | module Entities 5 | ( companyNames 6 | , peopleNames 7 | , countryNames 8 | , cityNames 9 | , broadcastNetworkNames 10 | , politicalPartyNames 11 | , tradeUnionNames 12 | , universityNames 13 | ) where 14 | 15 | import Data.Char (toLower) 16 | import Data.List (intersect, intersperse, sort) 17 | import qualified Data.Map as M 18 | import Data.Maybe (isJust) 19 | import Data.Set (empty) 20 | import qualified Data.Set as S 21 | 22 | import NlpUtils 23 | ( bigram 24 | , bigram_s 25 | , removeDuplicates 26 | , splitWords 27 | , splitWordsKeepCase 28 | , trigram 29 | , trigram_s 30 | ) 31 | 32 | import CityNamesDbpedia (cityMap) 33 | import CompanyNamesDbpedia (companyMap) 34 | import CountryNamesDbpedia (countryMap) 35 | import PeopleDbPedia (peopleMap) 36 | 37 | import BroadcastNetworkNamesDbPedia (broadcastNetworkMap) 38 | import PoliticalPartyNamesDbPedia (politicalPartyMap) 39 | import TradeUnionNamesDbPedia (tradeUnionMap) 40 | import UniversityNamesDbPedia (universityMap) 41 | 42 | isSubsetOf :: (Foldable t1, Foldable t2, Eq a) => t1 a -> t2 a -> Bool 43 | xs `isSubsetOf` ys = all (`elem` ys) xs 44 | 45 | namesHelper ngrams dbPediaMap = 46 | filter 47 | (\x -> 48 | case x of 49 | (_, Just x) -> True 50 | _ -> False) $ 51 | map 52 | (\ngram -> 53 | ( ngram 54 | , let v = M.lookup ngram dbPediaMap 55 | in if isJust v 56 | then return (ngram, v) 57 | else Nothing)) 58 | ngrams 59 | 60 | helperNames1W = namesHelper 61 | 62 | helperNames2W wrds = namesHelper (bigram_s wrds) 63 | 64 | helperNames3W wrds = namesHelper (trigram_s wrds) 65 | 66 | entityHelper entityTypeMap wrds = 67 | let cns = 68 | removeDuplicates $ 69 | sort $ 70 | helperNames1W wrds entityTypeMap ++ 71 | helperNames2W wrds entityTypeMap ++ helperNames3W wrds entityTypeMap 72 | in map (\(s, Just (a, Just b)) -> (a, b)) cns 73 | 74 | companyNames wrds = entityHelper companyMap wrds 75 | 76 | countryNames wrds = entityHelper countryMap wrds 77 | 78 | peopleNames wrds = entityHelper peopleMap wrds 79 | 80 | cityNames wrds = entityHelper cityMap wrds 81 | 82 | broadcastNetworkNames wrds = entityHelper broadcastNetworkMap wrds 83 | 84 | politicalPartyNames wrds = entityHelper politicalPartyMap wrds 85 | 86 | tradeUnionNames wrds = entityHelper tradeUnionMap wrds 87 | 88 | universityNames wrds = entityHelper universityMap wrds 89 | 90 | main :: IO () 91 | main = do 92 | let s = 93 | "As read in the San Francisco Chronicle, the company is owned by John Smith, Bill Clinton, Betty Sanders, and Dr. Ben Jones. Ben Jones and Mr. John Smith are childhood friends who grew up in Brazil, Canada, Buenos Aires, and the British Virgin Islands. Apple Computer relased a new version of OS X yesterday. Brazil Brazil Brazil. John Smith bought stock in ConocoPhillips, Heinz, Hasbro, and General Motors, Fox Sports Radio. I listen to B J Cole. Awami National Party is a political party. ALAEA is a trade union. She went to Brandeis University." 94 | print $ peopleNames $ splitWordsKeepCase s 95 | print $ countryNames $ splitWordsKeepCase s 96 | print $ companyNames $ splitWordsKeepCase s 97 | print $ cityNames $ splitWordsKeepCase s 98 | print $ broadcastNetworkNames $ splitWordsKeepCase s 99 | print $ politicalPartyNames $ splitWordsKeepCase s 100 | print $ tradeUnionNames $ splitWordsKeepCase s 101 | print $ universityNames $ splitWordsKeepCase s 102 | -------------------------------------------------------------------------------- /NlpTool/src/nlp/NlpUtils.hs: -------------------------------------------------------------------------------- 1 | -- Copyright 2014 by Mark Watson. All rights reserved. The software and data in this project can be used under the terms of the GPL version 3 license. 2 | module NlpUtils 3 | ( splitWords 4 | , bigram 5 | , bigram_s 6 | , splitWordsKeepCase 7 | , trigram 8 | , trigram_s 9 | , removeDuplicates 10 | , cleanText 11 | ) where 12 | 13 | import Data.Char (toLower) 14 | import Data.Set (fromList, toList) 15 | import Data.String.Utils (replace) 16 | 17 | cleanText s = replace "“" "\"" $ replace "”" "\"" $ replace "’" "'" s 18 | 19 | splitWords :: String -> [String] 20 | splitWords = 21 | words . 22 | map 23 | (\c -> 24 | if c `elem` ".,;:!\n\t\"" 25 | then ' ' 26 | else toLower c) 27 | 28 | bigram :: [a] -> [[a]] 29 | bigram [] = [] 30 | bigram [_] = [] 31 | bigram xs = take 2 xs : bigram (tail xs) 32 | 33 | bigram_s xs = [(head a) ++ " " ++ (head a) | a <- bigram xs] 34 | 35 | splitWordsKeepCase :: String -> [String] 36 | splitWordsKeepCase = 37 | words . 38 | map 39 | (\c -> 40 | if c `elem` ".,;:!\n\t\"" 41 | then ' ' 42 | else c) 43 | 44 | trigram :: [a] -> [[a]] 45 | trigram [] = [] 46 | trigram [_] = [] 47 | trigram [_, _] = [] 48 | trigram xs = take 3 xs : trigram (tail xs) 49 | 50 | trigram_s xs = 51 | [(head a) ++ " " ++ (a !! 1) ++ " " ++ (a !! 2) | a <- trigram xs] 52 | 53 | removeDuplicates :: Ord a => [a] -> [a] 54 | removeDuplicates = toList . fromList 55 | -------------------------------------------------------------------------------- /NlpTool/src/nlp/Stemmer.hs: -------------------------------------------------------------------------------- 1 | module Stemmer 2 | ( stem 3 | ) where 4 | 5 | import Control.Monad 6 | import Data.List 7 | import Data.Maybe 8 | import System.IO 9 | 10 | -- written by Dmitry Antonyuk - believed to be in public domain 11 | isConsonant str i 12 | | c `elem` "aeiou" = False 13 | | c == 'y' = i == 0 || isVowel str (i - 1) 14 | | otherwise = True 15 | where 16 | c = str !! i 17 | 18 | isVowel = (not .) . isConsonant 19 | 20 | byIndex fun str = fun str [0 .. length str - 1] 21 | 22 | measure = 23 | length . 24 | filter not . init . (True :) . map head . group . byIndex (map . isConsonant) 25 | 26 | containsVowel = byIndex (any . isVowel) 27 | 28 | endsWithDouble = startsWithDouble . reverse 29 | where 30 | startsWithDouble l 31 | | length l < 2 = False 32 | | otherwise = 33 | let (x:y:_) = l 34 | in x == y && x `notElem` "aeiou" 35 | 36 | cvc word 37 | | length word < 3 = False 38 | | otherwise = 39 | isConsonant word lastIndex && 40 | isVowel word (lastIndex - 1) && 41 | isConsonant word (lastIndex - 2) && last word `notElem` "wxy" 42 | where 43 | lastIndex = length word - 1 44 | 45 | statefulReplace predicate str end replacement 46 | | end `isSuffixOf` str = Just replaced 47 | | otherwise = Nothing 48 | where 49 | part = take (length str - length end) str 50 | replaced 51 | | predicate part = Right (part ++ replacement) 52 | | otherwise = Left str 53 | 54 | replaceEnd predicate str end replacement = do 55 | result <- statefulReplace predicate str end replacement 56 | return (either id id result) 57 | 58 | findStem f word pairs = msum $ map (uncurry (replaceEnd f word)) pairs 59 | 60 | measureGT = flip ((>) . measure) 61 | 62 | step1a word = fromMaybe word result 63 | where 64 | result = 65 | findStem 66 | (const True) 67 | word 68 | [("sses", "ss"), ("ies", "i"), ("ss", "ss"), ("s", "")] 69 | 70 | beforeStep1b word = fromMaybe (Left word) result 71 | where 72 | cond23 x = do 73 | v <- x 74 | either (const Nothing) (return . Right) v 75 | cond1 x = do 76 | v <- x 77 | return (Left v) 78 | result = 79 | cond1 (replaceEnd (measureGT 0) word "eed" "ee") `mplus` 80 | cond23 (statefulReplace containsVowel word "ed" "") `mplus` 81 | cond23 (statefulReplace containsVowel word "ing" "") 82 | 83 | afterStep1b word = fromMaybe word result 84 | where 85 | double = 86 | endsWithDouble word && not (any ((`isSuffixOf` word) . return) "lsz") 87 | mEq1AndCvc = measure word == 1 && cvc word 88 | iif cond val = 89 | if cond 90 | then Just val 91 | else Nothing 92 | result = 93 | findStem (const True) word [("at", "ate"), ("bl", "ble"), ("iz", "ize")] `mplus` 94 | iif double (init word) `mplus` 95 | iif mEq1AndCvc (word ++ "e") 96 | 97 | step1b = either id afterStep1b . beforeStep1b 98 | 99 | step1c word = fromMaybe word result 100 | where 101 | result = replaceEnd containsVowel word "y" "i" 102 | 103 | step1 = step1c . step1b . step1a 104 | 105 | step2 word = fromMaybe word result 106 | where 107 | result = 108 | findStem 109 | (measureGT 0) 110 | word 111 | [ ("ational", "ate") 112 | , ("tional", "tion") 113 | , ("enci", "ence") 114 | , ("anci", "ance") 115 | , ("izer", "ize") 116 | , ("bli", "ble") 117 | , ("alli", "al") 118 | , ("entli", "ent") 119 | , ("eli", "e") 120 | , ("ousli", "ous") 121 | , ("ization", "ize") 122 | , ("ation", "ate") 123 | , ("ator", "ate") 124 | , ("alism", "al") 125 | , ("iveness", "ive") 126 | , ("fulness", "ful") 127 | , ("ousness", "ous") 128 | , ("aliti", "al") 129 | , ("iviti", "ive") 130 | , ("biliti", "ble") 131 | , ("logi", "log") 132 | ] 133 | 134 | step3 word = fromMaybe word result 135 | where 136 | result = 137 | findStem 138 | (measureGT 0) 139 | word 140 | [ ("icate", "ic") 141 | , ("ative", "") 142 | , ("alize", "al") 143 | , ("iciti", "ic") 144 | , ("ical", "ic") 145 | , ("ful", "") 146 | , ("ness", "") 147 | ] 148 | 149 | step4 word = fromMaybe word result 150 | where 151 | gt1andST str = (measureGT 1) str && any ((`isSuffixOf` str) . return) "st" 152 | findGT1 = findStem (measureGT 1) word . map (flip (,) "") 153 | result = 154 | (findGT1 155 | [ "al" 156 | , "ance" 157 | , "ence" 158 | , "er" 159 | , "ic" 160 | , "able" 161 | , "ible" 162 | , "ant" 163 | , "ement" 164 | , "ment" 165 | , "ent" 166 | ]) `mplus` 167 | (findStem gt1andST word [("ion", "")]) `mplus` 168 | (findGT1 ["ou", "ism", "ate", "iti", "ous", "ive", "ize"]) 169 | 170 | step5a word = fromMaybe word result 171 | where 172 | test str = (measureGT 1 str) || ((measure str == 1) && (not $ cvc str)) 173 | result = replaceEnd test word "e" "" 174 | 175 | step5b word = fromMaybe word result 176 | where 177 | cond s = last s == 'l' && measureGT 1 s 178 | result = replaceEnd cond word "l" "" 179 | 180 | step5 = step5b . step5a 181 | 182 | allSteps = step5 . step4 . step3 . step2 . step1 183 | 184 | stem s 185 | | length s < 3 = s 186 | | otherwise = allSteps s 187 | 188 | main :: IO () 189 | main 190 | --content <- readFile "input.txt" 191 | --writeFile "out.txt" $ unlines $ map stem $ lines content 192 | = do 193 | let content = 194 | "The sport of hocky is about 100 years old by ahdi dates. American Football is a newer sport. Programming is fun. Congress passed a new budget that might help the economy. The President signed the tax bill." 195 | in print $ map stem $ words content 196 | -------------------------------------------------------------------------------- /NlpTool/src/nlp/Summarize.hs: -------------------------------------------------------------------------------- 1 | -- Copyright 2014 by Mark Watson. All rights reserved. 2 | -- The software and data in this project can be used under the terms of the AGPL version 3 license or Apache 2 license. 3 | module Summarize 4 | ( summarize 5 | , summarizeS 6 | ) where 7 | 8 | import Categorize (bestCategories) 9 | import Data.List.Utils (replace) 10 | import qualified Data.Map as M 11 | import Data.Maybe (fromMaybe) 12 | import NlpUtils (bigram_s, cleanText, splitWords) 13 | import Sentence (segment) 14 | 15 | import Category1Gram (onegrams) 16 | import Category2Gram (twograms) 17 | 18 | scoreSentenceHelper words scoreMap -- just use 1grams for now 19 | = sum $ map (\word -> M.findWithDefault 0.0 word scoreMap) words 20 | 21 | safeLookup key alist = fromMaybe 0 $ lookup key alist 22 | 23 | scoreSentenceByBestCategories words catDataMaps bestCategories = 24 | map 25 | (\(category, aMap) -> 26 | ( category 27 | , safeLookup category bestCategories * scoreSentenceHelper words aMap)) 28 | catDataMaps 29 | 30 | scoreForSentence words catDataMaps bestCategories = 31 | sum $ map snd $ scoreSentenceByBestCategories words catDataMaps bestCategories 32 | 33 | summarize s = 34 | let words = splitWords $ cleanText s 35 | bestCats = bestCategories words 36 | sentences = segment s 37 | result1grams = 38 | map 39 | (\sentence -> 40 | ( sentence 41 | , scoreForSentence (splitWords sentence) onegrams bestCats)) 42 | sentences 43 | result2grams = 44 | map 45 | (\sentence -> 46 | ( sentence 47 | , scoreForSentence 48 | (bigram_s (splitWords sentence)) 49 | twograms 50 | bestCats)) 51 | sentences 52 | mergedResults = 53 | filter (\(s, v) -> v > 0.05) $ 54 | M.toList $ 55 | M.unionWith (+) (M.fromList result1grams) (M.fromList result1grams) 56 | c400 = filter (\(sentence, score) -> score > 10) mergedResults 57 | c300 = filter (\(sentence, score) -> score > 7) mergedResults 58 | c280 = filter (\(sentence, score) -> score > 4) mergedResults 59 | c250 = filter (\(sentence, score) -> score > 3) mergedResults 60 | c200 = filter (\(sentence, score) -> score > 2) mergedResults 61 | c100 = filter (\(sentence, score) -> score > 1) mergedResults 62 | c050 = filter (\(sentence, score) -> score > 0.5) mergedResults 63 | c000 = mergedResults 64 | in if (not (null c400)) && (length c400) > 1 && (length c400) < 3 65 | then c400 66 | else if (not (null c300)) && (length c300) > 1 && (length c300) < 3 67 | then c300 68 | else if not (null c280) && (length c280) > 1 && (length c280) < 3 69 | then c280 70 | else if not (null c250) && 71 | (length c250) > 1 && (length c250) < 3 72 | then c250 73 | else if not (null c200) && 74 | (length c200) > 1 && (length c200) < 3 75 | then c200 76 | else if not (null c100) && 77 | (length c100) > 1 && 78 | (length c100) < 3 79 | then c100 80 | else if not (null c050) && 81 | (length c050) > 1 && 82 | (length c050) < 3 83 | then c050 84 | else if (length mergedResults) < 85 | 3 86 | then c000 87 | else [ head 88 | mergedResults 89 | , (head . tail) 90 | mergedResults 91 | ] 92 | 93 | summarizeS s = 94 | let a = 95 | replace "\"" "'" $ 96 | replace "\n" " " $ concatMap (\x -> fst x ++ " ") $ summarize s 97 | in init $ 98 | if not (null a) 99 | then a 100 | else safeFirst $ segment s 101 | where 102 | safeFirst x 103 | | length x > 1 = head x ++ x !! 1 104 | | not (null x) = head x 105 | | otherwise = " " 106 | 107 | main = do 108 | let s = 109 | "Plunging European stocks, wobbly bonds and grave concerns about the health of Portuguese lender Banco Espirito Santo SA made last week feel like a rerun of the euro crisis, but most investors say it was no more than a blip for a resurgent region. Banco Espirito Santo has been in investors’ sights since December, when The Wall Street Journal first reported on accounting irregularities at the complex firm. Nerves frayed on Thursday when Banco Espirito Santo's parent company said it wouldn't be able to meet some short-term debt obligations." 110 | print $ summarize s 111 | print $ summarizeS s 112 | -------------------------------------------------------------------------------- /NlpTool/src/nlp/data/UniversityNamesDbPedia.hs: -------------------------------------------------------------------------------- 1 | -- Copyright 2014 by Mark Watson. All rights reserved. The software and data in this project can be used under the terms of the GPL version 3 license. 2 | 3 | module UniversityNamesDbPedia (universityMap) where 4 | 5 | import qualified Data.Map as M 6 | 7 | universityMap = M.fromList [("ISAE formation SUPAERO", "