├── .dockerignore ├── README.md ├── apis ├── AttentionAPI.lua ├── EntityLinkerAPI.lua ├── afterlife.png ├── init.lua └── install.sh ├── build.sh ├── datasets ├── Freebase │ └── Freebase_EL_API.lua ├── NLP │ ├── first_names.txt │ └── last_names.txt ├── SimpleQuestions │ ├── README.txt │ ├── annotated_fb_data_test.txt │ ├── annotated_fb_data_train.txt │ ├── annotated_fb_data_valid.txt │ ├── entity_vocab.txt │ ├── entity_vocab_char.txt │ ├── predicate_vocab.txt │ ├── predicate_vocab_char.txt │ ├── question_vocab.txt │ ├── question_vocab_char.txt │ └── test │ │ ├── all_entities.txt │ │ ├── all_predicates.txt │ │ ├── entities.txt │ │ ├── entity_vocab.txt │ │ ├── entity_vocab_char.txt │ │ ├── log_likelihoods.txt │ │ ├── object_names.txt │ │ ├── positive_entities.txt │ │ ├── positive_predicates.txt │ │ ├── predicate_vocab.txt │ │ ├── predicate_vocab_char.txt │ │ ├── predicates.txt │ │ ├── predictions_entities_1.txt │ │ ├── predictions_facts_1.txt │ │ ├── predictions_predicates_1.txt │ │ ├── queries.txt │ │ ├── question_vocab.txt │ │ ├── question_vocab_char.txt │ │ ├── questions.txt │ │ └── subject_names.txt ├── init.lua ├── scripts │ └── evaluate_sq_fb_dataset_v2.lua └── util │ ├── HashVocab.lua │ ├── SparseHashVocab.lua │ ├── Vocab.lua │ ├── math.lua │ ├── qa_processing_util.lua │ └── read_sq_data.lua ├── delete.sh ├── dmn ├── README.md ├── answer_module │ └── AnswerRerankModule.lua ├── dmn_network │ └── Attention_Network.lua ├── init.lua ├── input_module │ ├── hidden_layers │ │ ├── HiddenDummyLayer.lua │ │ ├── HiddenGRUProjLayer.lua │ │ ├── HiddenIdentityLayer.lua │ │ ├── HiddenLayer.lua │ │ └── HiddenProjLayer.lua │ └── input_layers │ │ ├── BOWLayer.lua │ │ ├── EmbedLayer.lua │ │ ├── FastHashLayer.lua │ │ ├── HashLayer.lua │ │ ├── InputLayer.lua │ │ └── SparseHashLayer.lua ├── models │ ├── Attention_LSTM_Decoder.lua │ ├── DSSM_Layer.lua │ ├── LSTM_Decoder.lua │ ├── RNN_Utils.lua │ ├── nn_units │ │ ├── CRowAddTable.lua │ │ ├── CRowMulTable.lua │ │ ├── JoinTable.lua │ │ ├── Linear.lua │ │ ├── Optim.lua │ │ ├── PaddedJoinTable.lua │ │ ├── Padding.lua │ │ ├── SmoothCosineSimilarity.lua │ │ ├── SpatialCrossLRN.lua │ │ └── Squeeze.lua │ └── rnn_units │ │ └── units.lua ├── question_module │ └── QuestionModule.lua ├── scripts │ ├── create_vocab.py │ ├── setup.sh │ └── untitled ├── semantic_memory_module │ └── WordEmbedModule.lua ├── test_scripts.sh ├── tests │ ├── NCE_test.lua │ ├── attention_test.lua │ ├── batch_answer_module_test.lua │ ├── batch_captioner_test.lua │ ├── batch_gf_test.lua │ ├── batch_gradient_mask_test.lua │ ├── batch_gru_test.lua │ ├── batch_lstm_equivalence_test.lua │ ├── batch_lstm_network_test.lua │ ├── batch_lstm_test.lua │ ├── batch_softmax_test.lua │ ├── bleu_test.lua │ ├── coarse_attention_test.lua │ ├── context_test.lua │ ├── dataset_test.lua │ ├── dmn_predict_test.lua │ ├── dmn_unit_test.lua │ ├── equality_test.lua │ ├── gf_rnn_test.lua │ ├── grad_test.lua │ ├── grayscale.jpg │ ├── gru_test.lua │ ├── image_loader_test.lua │ ├── image_statistics_test.lua │ ├── kl_div_test.lua │ ├── load_dssm_test.lua │ ├── load_qa_test.lua │ ├── mem_network.lua │ ├── padding_test.lua │ ├── parallelApply.lua │ ├── probability_interpolation_test.lua │ ├── qa_attention_test.lua │ ├── read_from_shell_test.lua │ ├── substring_test.lua │ ├── temporal_convolution_test.lua │ ├── tensor_partition_test.lua │ ├── vision_test.lua │ └── vocab_hash_test.lua ├── trained_models │ └── Attention_Network_char_level_true_attention_type_coarse_fixed_35.th └── util │ ├── constants.lua │ ├── eval_functions.lua │ ├── functions.lua │ ├── io_functions.lua │ ├── logger.lua │ ├── math_functions.lua │ └── print_logger.lua ├── license.txt ├── opensource └── json.lua └── servers └── FreebaseWebServer ├── README.md ├── docker-compose.yml ├── requirements.txt ├── src ├── .DS_Store~668f33dd2e7a36f45dece9e86a3a8ea0fe95588f ├── .DS_Store~937255f17dd28d0aeb532c7c6b9a28604c71306c ├── .DS_Store~HEAD ├── .DS_Store~HEAD_0 ├── client │ ├── __init__.py │ └── superhero.py ├── config.py ├── manage.py ├── model │ ├── __init__.py │ ├── abc.py │ └── user.py ├── resource │ ├── __init__.py │ ├── freebase.py │ └── user.py ├── route │ ├── __init__.py │ ├── common.py │ ├── freebase.py │ └── user.py ├── server.py └── util │ ├── __init__.py │ ├── freebase.py │ ├── freebase_helper.py │ ├── parse_params.py │ └── tokenizer.py └── test ├── __init__.py └── route ├── __init__.py └── test_user.py /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/.dockerignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This contains the code corresponding to the publication Character-Level Question-Answering with Attention found at: 2 | https://arxiv.org/abs/1604.00727 3 | 4 | To run the evaluation script first build the container: 5 | # Pull the container 6 | docker pull davidgolub/simpleqa 7 | 8 | docker run -it davidgolub/simpleqa /bin/bash 9 | 10 | # Then inside the container start the freebase server 11 | cd /src/servers/FreebaseWebServer 12 | 13 | pip install -r requirements.txt 14 | 15 | cd src 16 | 17 | python server.py & disown 18 | 19 | # Run the evaluation script 20 | cd /src/datasets 21 | 22 | th scripts/evaluate_sq_fb_dataset_v2.lua 23 | 24 | We hope this code helps researchers explore character-level encoder-decoder frameworks to semantic parsing with KBs and many other tasks in NLP! Email golubd [at] stanford.edu if you have any questions. 25 | 26 | -------------------------------------------------------------------------------- /apis/afterlife.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/apis/afterlife.png -------------------------------------------------------------------------------- /apis/init.lua: -------------------------------------------------------------------------------- 1 | softmax = {} 2 | 3 | http = require("socket.http") 4 | 5 | -- ML APIs 6 | include('AttentionAPI.lua') 7 | 8 | -- Entity Linkers 9 | include('EntityLinkerAPI.lua') 10 | 11 | 12 | -- ml apis 13 | softmax.qa_api = softmax.AttentionAPI{} 14 | 15 | -- entity linking api 16 | softmax.entity_linker_api = softmax.EntityLinkerAPI{} 17 | 18 | 19 | -- profilers 20 | -- softmax.profiler = require('../apis/profilers/ProFi.lua') 21 | 22 | 23 | --local tmp_path = '/Users/David/Desktop/test.%s' 24 | --local image_url = 'https://static.elie.net/image/blog/2011/07/301-redirect1.png' 25 | --local im_path = softmax.image_api:download(image_url, tmp_path) 26 | --local cur_img = image.load(im_path) 27 | 28 | print("Done loading image captioning modules") -------------------------------------------------------------------------------- /apis/install.sh: -------------------------------------------------------------------------------- 1 | curl -s https://raw.githubusercontent.com/torch/ezinstall/master/install-all | bash 2 | luarocks install json 3 | luarocks install nn 4 | luarocks install nngraph 5 | luarocks install json 6 | luarocks install luasocket 7 | luarocks install lua-cjson 8 | luarocks install luasec 9 | luarocks install utf8 10 | luarocks install threads 11 | luarocks install argcheck 12 | luarocks install xavante 13 | luarocks install wsapi-xavante 14 | luarocks install cgilua 15 | luarocks install sailor 16 | 17 | pip install tinys3 18 | 19 | # Core utils for mac 20 | brew install coreutils findutils gnu-tar gnu-sed gawk gnutls gnu-indent gnu-getopt 21 | 22 | 23 | git filter-branch --prune-empty -d ~/Desktop/scratch \ 24 | --index-filter "git rm --cached -f --ignore-unmatch TorchWebServer/cpu/softmax/models/cm_captioning_5.th" \ 25 | --tag-name-filter cat -- --all 26 | 27 | # Processes: 28 | 14086 -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker pull davidgolub/simpleqa . 3 | docker run -it simpleqa /bin/bash 4 | 5 | # Then inside the container start the freebase server 6 | cd /src/servers/FreebaseWebServer 7 | pip install -r requirements.txt 8 | cd src 9 | python server.py & disown 10 | 11 | # Run the evaluation script 12 | cd /src/datasets 13 | th scripts/evaluate_sq_fb_dataset_v2.lua -------------------------------------------------------------------------------- /datasets/Freebase/Freebase_EL_API.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | FreebaseAPI: Get candidate entities and facts from freebase using elasticsearch 4 | 5 | --]] 6 | 7 | local FreebaseAPI = torch.class('datasets.Freebase_EL_API') 8 | 9 | function FreebaseAPI:__init(config) 10 | self.base_url = 'http://localhost:5000' 11 | 12 | self.entity_freebase_endpoint = self.base_url .. "/api/v1/freebase/name?query=%s&num_results=%d&remove_stopwords=True"--filter=suggest&key=%s" 13 | self.name_freebase_endpoint = self.base_url .. "/api/v1/freebase/name?query=%s&num_results=%d&remove_stopwords=True" 14 | self.topic_freebase_endpoint = self.base_url .. "/api/v1/freebase/fact?topic_ids=%s&num_results=%d&remove_stopwords=True" 15 | 16 | self.num_calls = 0 17 | end 18 | 19 | -- Keep track of number of calls 20 | function FreebaseAPI:increment_num_calls() 21 | self.num_calls = self.num_calls + 1 22 | end 23 | 24 | -- Returns freebase ids of all entities that match name 25 | function FreebaseAPI:entities(query, num_results) 26 | assert(query ~= nil, "topic id must not be null") 27 | assert(num_results ~= nil, "Number of results must not be null") 28 | 29 | self:increment_num_calls() 30 | 31 | local encoded_query = dmn.io_functions.url_encode(query) 32 | 33 | -- get features from url 34 | local extract_url = string.format(self.entity_freebase_endpoint, 35 | encoded_query, 36 | num_results) 37 | --dmn.logger:print("Extracting from url " .. extract_url) 38 | 39 | local num_tries = 0 40 | 41 | -- extract topics 42 | local html = dmn.io_functions.http_request(extract_url) 43 | local json_vals 44 | 45 | -- Hacky error handling for image load issues 46 | dmn.io_functions.trycatch( 47 | function() 48 | json_vals = dmn.io_functions.json_decode(html) 49 | end, 50 | function(err) 51 | --dmn.logger:print("ERROR OCCURED LOADING IMAGE") 52 | print("ERROR DECODING JSON " .. err .. tostring(html)) 53 | json_vals = nil 54 | end) 55 | 56 | if json_vals == nil or json_vals["result"] == nil then 57 | name = "ERROR OCCURED" 58 | dmn.logger:print("ERROR REQUESTING") 59 | dmn.logger:print("ERROR HTTPS REQUESTING " .. html) 60 | end 61 | 62 | while (json_vals == nil or json_vals["result"] == nil) and (num_tries < dmn.constants.NUM_RETRIES) do 63 | -- try again 64 | local msg = "Error requesting url " .. extract_url .. " retrying again" .. num_tries 65 | dmn.logger:print(msg) 66 | 67 | 68 | num_tries = num_tries + 1 69 | -- request url again 70 | html = dmn.io_functions.http_request(extract_url) 71 | -- Hacky error handling for image load issues 72 | dmn.io_functions.trycatch( 73 | function() 74 | json_vals = dmn.io_functions.json_decode(html)--lua_json.parse(html) 75 | end, 76 | function(err) 77 | --dmn.logger:print("ERROR OCCURED LOADING IMAGE") 78 | softmax.run_api:add_error_log("ERROR DECODING JSON " .. err .. tostring(html)) 79 | json_vals = nil 80 | end) 81 | end 82 | 83 | local names = json_vals["result"] 84 | local ids = {} 85 | local entity_names = {} 86 | local entity_types = {} 87 | 88 | for i = 1, #names do 89 | local cur_entity = names[i] 90 | local cur_id = cur_entity["freebase_id"] 91 | local cur_name = cur_entity["freebase_name"] 92 | local cur_types = {"TYPES_NOT_SUPPORTED"} 93 | 94 | table.insert(entity_names, cur_name) 95 | table.insert(ids, cur_id) 96 | table.insert(entity_types, cur_types) 97 | end 98 | 99 | return ids, entity_names, entity_types 100 | end 101 | 102 | -- Gets facts about a topic id 103 | -- image_url: URL to extract images from, must be not null 104 | -- returns: Torch double tensor of size 1024 with googlenet features from image url 105 | function FreebaseAPI:facts(topic_ids, num_results) 106 | assert(topic_ids ~= nil, "topic ids must not be null") 107 | assert(num_results ~= nil, "Number of results must not be null") 108 | 109 | self:increment_num_calls() 110 | 111 | csv_topic_ids = table.concat(topic_ids, ',') 112 | -- get features from url 113 | local extract_url = string.format(self.topic_freebase_endpoint, 114 | csv_topic_ids, 115 | num_results) 116 | 117 | 118 | names = {} 119 | topics = {} 120 | ids = {} 121 | facts = {} 122 | -- Hacky error handling for web request issues 123 | dmn.io_functions.trycatch( 124 | function() 125 | 126 | -- extract topics 127 | local html = dmn.io_functions.http_request(extract_url) 128 | local json_vals = dmn.io_functions.json_decode(html) 129 | local topics_desc = json_vals["result"] 130 | 131 | -- insert them 132 | for k,v in pairs(topics_desc) do 133 | if not dmn.functions.string_starts(k, "/type/object") then 134 | local id = v['src_freebase_id'] 135 | local name = v['src_freebase_name'] 136 | local topic = v['pred_freebase_name'] 137 | local tgt_freebase_id = v['tgt_freebase_id'] 138 | table.insert(names, name) 139 | table.insert(ids, id) 140 | table.insert(topics, topic) 141 | facts[name .. " " .. topic] = tgt_freebase_id 142 | end 143 | end 144 | end, 145 | 146 | function(err) 147 | dmn.logger:print("ERROR OCCURED requesting url " .. extract_url .. " " .. err) 148 | 149 | topics = {"ERROR OCCURED " .. topic_id} 150 | end) 151 | return topics, names, ids, facts 152 | end 153 | -------------------------------------------------------------------------------- /datasets/SimpleQuestions/README.txt: -------------------------------------------------------------------------------- 1 | The SimpleQuestions Dataset 2 | -------------------------------------------------------- 3 | In this directory is the SimpleQuestions dataset collected for 4 | research in automatic question answering. 5 | 6 | ** DATA ** 7 | SimpleQuestions is a dataset for simple QA, which consists 8 | of a total of 108,442 questions written in natural language by human 9 | English-speaking annotators each paired with a corresponding fact, 10 | formatted as (subject, relationship, object), that provides the answer 11 | but also a complete explanation. Fast have been extracted from the 12 | Knowledge Base Freebase (freebase.com). We randomly shuffle these 13 | questions and use 70\% of them (75910) as training set, 10\% as 14 | validation set (10845), and the remaining 20\% as test set. 15 | 16 | ** FORMAT ** 17 | Data is organized in 3 files: annotated_fb_data_{train, valid, test}.txt . 18 | Each file contains one example per line with the following format: 19 | "Subject-entity [tab] relationship [tab] Object-entity [tab] question", 20 | with Subject-entity, relationship and Object-entity being www links 21 | pointing to the actual Freebase entities. 22 | 23 | ** DATA COLLECTION** 24 | We collected SimpleQuestions in two phases. The first phase consisted 25 | of shortlisting the set of facts from Freebase to be annotated with 26 | questions. We used Freebase as background KB and removed all facts 27 | with undefined relationship type i.e. containing the word 28 | "freebase". We also removed all facts for which the (subject, 29 | relationship) pair had more than a threshold number of objects. This 30 | filtering step is crucial to remove facts which would result in 31 | trivial uninformative questions, such as, "Name a person who is an 32 | actor?". The threshold was set to 10. 33 | 34 | In the second phase, these selected facts were sampled and delivered 35 | to human annotators to generate questions from them. For the sampling, 36 | each fact was associated with a probability which defined as a 37 | function of its relationship frequency in the KB: to favor 38 | variability, facts with relationship appearing more 39 | frequently were given lower probabilities. For each sampled facts, 40 | annotators were shown the facts along with hyperlinks to 41 | www.freebase.com to provide some context while framing the 42 | question. Given this information, annotators were asked to phrase a 43 | question involving the subject and the relationship 44 | of the fact, with the answer being the object. The 45 | annotators were explicitly instructed to phrase the question 46 | differently as much as possible, if they encounter multiple facts with 47 | similar relationship. They were also given the option of 48 | skipping facts if they wish to do so. This was very important to 49 | avoid the annotators to write a boiler plate questions when they had 50 | no background knowledge about some facts. 51 | 52 | -------------------------------------------------------------------------------- /datasets/SimpleQuestions/entity_vocab_char.txt: -------------------------------------------------------------------------------- 1 | E 2 | C 3 | a 4 | r 5 | d 6 | i 7 | c 8 | 9 | A 10 | e 11 | s 12 | t 13 | T 14 | h 15 | D 16 | b 17 | N 18 | o 19 | u 20 | U 21 | m 22 | v 23 | - 24 | O 25 | l 26 | n 27 | M 28 | f 29 | S 30 | W 31 | B 32 | . 33 | G 34 | R 35 | Y 36 | g 37 | H 38 | k 39 | w 40 | y 41 | P 42 | p 43 | ! 44 | I 45 | 5 46 | 8 47 | 0 48 | 3 49 | J 50 | j 51 | K 52 | L 53 | ' 54 | F 55 | V 56 | 1 57 | 9 58 | Z 59 | 2 60 | x 61 | ( 62 | ) 63 | , 64 | : 65 | z 66 | 7 67 | / 68 | ? 69 | 6 70 | q 71 | X 72 | ; 73 | 4 74 | Q 75 | " 76 | # 77 | [ 78 | ] 79 | $ 80 | * 81 | + 82 | % 83 | _ 84 | { 85 | } 86 | ` 87 | ~ 88 | @ 89 | = -------------------------------------------------------------------------------- /datasets/SimpleQuestions/predicate_vocab_char.txt: -------------------------------------------------------------------------------- 1 | / 2 | b 3 | o 4 | k 5 | w 6 | r 7 | i 8 | t 9 | e 10 | n 11 | _ 12 | s 13 | u 14 | j 15 | c 16 | m 17 | l 18 | a 19 | f 20 | y 21 | p 22 | d 23 | g 24 | v 25 | h 26 | x 27 | z 28 | q 29 | 1 30 | 2 31 | 3 32 | 8 33 | 9 34 | 0 35 | C -------------------------------------------------------------------------------- /datasets/SimpleQuestions/question_vocab_char.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/datasets/SimpleQuestions/question_vocab_char.txt -------------------------------------------------------------------------------- /datasets/SimpleQuestions/test/entity_vocab_char.txt: -------------------------------------------------------------------------------- 1 | H 2 | a 3 | r 4 | d 5 | e 6 | . 7 | F 8 | s 9 | t 10 | l 11 | A 12 | x 13 | 14 | G 15 | o 16 | f 17 | i 18 | P 19 | h 20 | y 21 | R 22 | g 23 | M 24 | q 25 | u 26 | Y 27 | v 28 | K 29 | n 30 | D 31 | b 32 | c 33 | L 34 | V 35 | ( 36 | m 37 | ) 38 | k 39 | T 40 | j 41 | J 42 | C 43 | 0 44 | 3 45 | z 46 | E 47 | Z 48 | S 49 | W 50 | w 51 | O 52 | p 53 | B 54 | 2 55 | 6 56 | ; 57 | I 58 | N 59 | 9 60 | ' 61 | , 62 | 1 63 | : 64 | U 65 | 4 66 | Q 67 | * 68 | 7 69 | 5 70 | 8 71 | - 72 | / 73 | = 74 | # 75 | + 76 | " 77 | X 78 | ! 79 | ? 80 | $ 81 | % 82 | [ 83 | ] -------------------------------------------------------------------------------- /datasets/SimpleQuestions/test/predicate_vocab_char.txt: -------------------------------------------------------------------------------- 1 | / 2 | m 3 | u 4 | s 5 | i 6 | c 7 | a 8 | l 9 | b 10 | g 11 | e 12 | n 13 | r 14 | _ 15 | t 16 | y 17 | p 18 | o 19 | f 20 | h 21 | w 22 | d 23 | v 24 | k 25 | j 26 | x 27 | z 28 | q 29 | 9 30 | 1 31 | 0 32 | 2 33 | 3 -------------------------------------------------------------------------------- /datasets/SimpleQuestions/test/predictions_entities_1.txt: -------------------------------------------------------------------------------- 1 | Harder.....Faster 2 | -------------------------------------------------------------------------------- /datasets/SimpleQuestions/test/predictions_facts_1.txt: -------------------------------------------------------------------------------- 1 | /m/06by7 /m/01qzt1 /m/0xhtw 2 | -------------------------------------------------------------------------------- /datasets/SimpleQuestions/test/predictions_predicates_1.txt: -------------------------------------------------------------------------------- 1 | /music/album/genre 2 | -------------------------------------------------------------------------------- /datasets/SimpleQuestions/test/question_vocab_char.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/datasets/SimpleQuestions/test/question_vocab_char.txt -------------------------------------------------------------------------------- /datasets/init.lua: -------------------------------------------------------------------------------- 1 | datasets = {} 2 | http = require("socket.http") 3 | https = require 'ssl.https' 4 | utf8 = require('utf8') 5 | require('json') 6 | require('lfs') 7 | cjson = require('cjson') 8 | 9 | -- Math helper functions 10 | include('../datasets/util/math.lua') 11 | 12 | -- For reading word embeddings, image features, and captions 13 | include('../datasets/util/Vocab.lua') 14 | include('../datasets/util/HashVocab.lua') 15 | include('../datasets/util/SparseHashVocab.lua') 16 | 17 | 18 | -- For reading simple questions data 19 | include('../datasets/util/read_sq_data.lua') 20 | 21 | -- For processing the qa data/generating vocab 22 | include('../datasets/util/qa_processing_util.lua') 23 | 24 | include('../datasets/Freebase/Freebase_EL_API.lua') 25 | 26 | datasets.freebase_api = datasets.Freebase_EL_API() 27 | 28 | -------------------------------------------------------------------------------- /datasets/scripts/evaluate_sq_fb_dataset_v2.lua: -------------------------------------------------------------------------------- 1 | require('../../dmn') 2 | 3 | cmd = torch.CmdLine() 4 | cmd:option('-start_index', 1, 'start index') 5 | cmd:option('-beam_size', 1000, 'beam size') 6 | cmd:option('-num_results', 1000, 'num results') 7 | cmd:option('-min_ngrams', 5, 'min ngrams') 8 | cmd:option('-max_ngrams', 8, 'max ngrams') 9 | cmd:option('-freebase_type', "Freebase_2M", "Freebase 2M") 10 | cmd:text() 11 | 12 | -- parse input params 13 | params = cmd:parse(arg) 14 | local min_ngrams = params.min_ngrams 15 | local max_ngrams = params.max_ngrams 16 | local num_results = params.num_results 17 | local beam_size = params.beam_size 18 | local freebase_type = params.freebase_type 19 | local rerank = false 20 | 21 | -- Create a new job for these items--log the stuff 22 | print("SimpleQuestions evaluation " .. freebase_type .. " " .. num_results) 23 | -- model we use for testing 24 | local model_paths = { 25 | --"../dmn/trained_models/Attention_Network_char_level_true_attention_type_coarse_23.th", 26 | "../dmn/trained_models/Attention_Network_char_level_true_attention_type_coarse_fixed_35.th", 27 | --"../dmn/trained_models/2531_Attention_Network_char_level_false_attention_type_coarse_fixed_9.th" 28 | --"../dmn/trained_models/Attention_Network_char_level_true_attention_type_coarse_25.th", 29 | --"../dmn/trained_models/Attention_Network_char_level_false_attention_type_coarse_17.th" 30 | --"../dmn/trained_models/Attention_Network_char_level_true_attention_type_coarse_fixed_20.th", 31 | --"../dmn/trained_models/Attention_Network_char_level_true_attention_type_coarse_fixed_26.th", 32 | --"../dmn/trained_models/Attention_Network_char_level_true_attention_type_coarse_fixed_27.th", 33 | --"../dmn/trained_models/Attention_Network_char_level_true_attention_type_coarse_21.th", 34 | --"../dmn/trained_models/Attention_Network_char_level_true_attention_type_coarse_19.th", 35 | --"../dmn/trained_models/Attention_Network_char_level_true_attention_type_fine_19.th", 36 | --"../dmn/trained_models/Attention_Network_char_level_true_attention_type_fine_17.th", 37 | } 38 | 39 | local stringed_models = table.concat(model_paths, " ") 40 | 41 | --"../dmn/trained_models/Attention_Network_char_level_false_attention_type_coarse_17.th", 42 | --"../dmn/trained_models/Attention_Network_char_level_true_attention_type_dropout_13.th"} 43 | 44 | for i = 1, #model_paths do 45 | local model_path = model_paths[i] 46 | softmax.qa_api:load_model_from_path(model_path) 47 | end 48 | 49 | local questions = datasets.read_line_data("../datasets/SimpleQuestions/test/questions.txt") 50 | local predicates = datasets.read_line_data("../datasets/SimpleQuestions/test/positive_predicates.txt") 51 | local entities = datasets.read_line_data("../datasets/SimpleQuestions/test/positive_entities.txt") 52 | 53 | local entity_ids = datasets.read_tabbed_data("../datasets/SimpleQuestions/annotated_fb_data_test.txt") 54 | local facts = datasets.read_line_data("../datasets/SimpleQuestions/test/object_names.txt") 55 | 56 | local total_seen = 0 57 | local total_corr = 0 58 | local total_pred_corr = 0 59 | local total_fact_corr = 0 60 | 61 | local predicate_corr = 0 62 | local entity_corr = 0 63 | 64 | predicates_path = '../datasets/SimpleQuestions/test/predictions_predicates_' .. params.start_index 65 | .. '.txt' 66 | 67 | entities_path = '../datasets/SimpleQuestions/test/predictions_entities_' .. params.start_index 68 | .. '.txt' 69 | 70 | facts_path = '../datasets/SimpleQuestions/test/predictions_facts_' .. params.start_index 71 | .. '.txt' 72 | 73 | results_path = '../datasets/SimpleQuestions/test/results.txt' 74 | 75 | -- test predictions 76 | local predicates_file = 77 | io.open(predicates_path, 'w') 78 | 79 | local entities_file = 80 | io.open(entities_path, 'w') 81 | 82 | local facts_file = 83 | io.open(facts_path, 'w') 84 | 85 | local results_file = 86 | io.open(results_path, 'w') 87 | 88 | for j = params.start_index, #questions do 89 | dmn.logger:print("On index " .. j) 90 | local question = dmn.functions.string_trim(questions[j]) 91 | 92 | --assert(false) 93 | local best_predicates, best_entities, best_ids, best_facts, fact_mappings, likelihoods, 94 | candidate_predicates, candidate_entities = 95 | softmax.qa_api:answer_v2(question, min_ngrams, max_ngrams, num_results, beam_size, rerank) 96 | 97 | local best_entity 98 | local best_predicate 99 | local best_id 100 | local best_fact = nil 101 | 102 | local cur_index = 1 103 | local best_entity = best_entities[1] 104 | 105 | while best_fact == nil or best_fact == "NO FACT" 106 | and cur_index <= beam_size do 107 | best_id = best_ids[cur_index] 108 | best_entity = best_entities[cur_index] 109 | best_predicate = best_predicates[cur_index] 110 | best_fact = best_facts[cur_index] 111 | print(best_entity .. " " .. best_predicate .. " " .. best_fact) 112 | cur_index = cur_index + 1 113 | end 114 | 115 | if cur_index > beam_size then 116 | dmn.logger:print("Could not find valid fact!") 117 | best_id = (best_ids[cur_index] == nil) and "NO_ID" or best_ids[cur_index] 118 | best_predicate = best_predicates[1] 119 | best_entity = best_entities[1] 120 | best_fact = best_facts[1] 121 | end 122 | 123 | print(best_entity .. " " .. best_predicate .. " " .. best_fact) 124 | 125 | if best_predicate == predicates[j] then 126 | predicate_corr = predicate_corr + 1 127 | end 128 | 129 | if best_entity == entities[j] then 130 | entity_corr = entity_corr + 1 131 | end 132 | 133 | local ids_match = best_id == actual_id 134 | local stripped_name = dmn.functions.strip_accents(entities[j]) 135 | 136 | local parse_matches = string.lower(best_predicate) == string.lower(predicates[j]) and 137 | string.lower(best_entity) == string.lower(stripped_name) 138 | 139 | if parse_matches then 140 | total_corr = total_corr + 1 141 | if facts_match then 142 | dmn.logger:print("Facts match ") 143 | total_fact_corr = total_fact_corr + 1 144 | end 145 | end 146 | 147 | total_seen = total_seen + 1 148 | 149 | local index = math.floor(datasets.freebase_api.num_calls / 90000 + 1) 150 | local num_calls = datasets.freebase_api.num_calls 151 | 152 | local msg = "Index Tot/Fact/Names/ pred/entity accuracy " .. j .. " " .. total_corr / total_seen .. " " 153 | .. total_fact_corr / total_seen .. " " .. total_pred_corr / total_seen 154 | .. " " .. predicate_corr / total_seen .. " " .. entity_corr / total_seen .. " " 155 | .. question .. " " .. best_predicate .. " " .. best_entity 156 | .. " FACT:" .. best_fact .. " " .. num_calls .. " " .. best_id 157 | 158 | dmn.logger:print(msg) 159 | 160 | results_file:write(msg .. "\n") 161 | entities_file:write(best_entity .. "\n") 162 | predicates_file:write(best_predicate .. "\n") 163 | facts_file:write(best_fact .. "\n") 164 | 165 | entities_file:flush() 166 | predicates_file:flush() 167 | facts_file:flush() 168 | results_file:flush() 169 | end 170 | 171 | entities_file:close() 172 | predicates_file:close() 173 | facts_file:close() 174 | results_file:close() 175 | -------------------------------------------------------------------------------- /datasets/util/HashVocab.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | A HashVocabulary object. Initialized from a file with one HashVocabulary token per line. 4 | Maps between HashVocabulary tokens and indices. If an UNK token is defined in the 5 | HashVocabulary, returns the index to this token if queried for an out-of-HashVocabulary 6 | token. 7 | 8 | --]] 9 | 10 | local HashVocab = torch.class('datasets.HashVocab') 11 | 12 | function HashVocab:__init(path, add_unk) 13 | assert(add_unk ~= nil, "Must determine whether to add unknown token or not") 14 | print("Loading HashVocabulary from path " .. path) 15 | 16 | self.hashed = true 17 | self.size = 0 18 | self._index = {} 19 | self._tokens = {} 20 | 21 | -- Include special start symbol and end symbol 22 | local file = io.open(path) 23 | if file == nil then error("Error opening file " .. path .. "\n") end 24 | 25 | -- Gets all tokens from line via hashing and adds 26 | local function add_line(line) 27 | local hashed_items = self:hash(line) 28 | for i = 1, #hashed_items do 29 | local hashed_item = hashed_items[i] 30 | self:add(hashed_item) 31 | end 32 | end 33 | 34 | local num_lines = 0 35 | while true do 36 | local line = file:read() 37 | num_lines = num_lines + 1 38 | if line == nil then break end 39 | add_line(line) 40 | end 41 | file:close() 42 | 43 | local unks = {'', '', 'UUUNKKK'} 44 | for _, tok in pairs(unks) do 45 | self.unk_index = self.unk_index or self._index[tok] 46 | if self.unk_index ~= nil then 47 | self.unk_token = tok 48 | break 49 | end 50 | end 51 | 52 | local starts = {'', ''} 53 | for _, tok in pairs(starts) do 54 | self.start_index = self.start_index or self._index[tok] 55 | if self.start_index ~= nil then 56 | self.start_token = tok 57 | break 58 | end 59 | end 60 | 61 | local ends = {'', ''} 62 | for _, tok in pairs(ends) do 63 | self.end_index = self.end_index or self._index[tok] 64 | if self.end_index ~= nil then 65 | self.end_token = tok 66 | break 67 | end 68 | end 69 | 70 | if add_unk then 71 | self:add_start_token() 72 | self:add_pad_token() 73 | self:add_end_token() 74 | self:add_unk_token() 75 | end 76 | 77 | print("Loaded all the HashVocabulary from " .. path .. " size is " .. self.size) 78 | end 79 | 80 | -- Hashes word into word vector 81 | function HashVocab:hash(w) 82 | hashed_items = {} 83 | padded_word = "#" .. w .. "#" 84 | for i = 1, #padded_word - 2 do 85 | table.insert(hashed_items, padded_word:sub(i, i + 2)) 86 | end 87 | return hashed_items 88 | end 89 | 90 | function HashVocab:contains(w) 91 | if not self._index[w] then return false end 92 | return true 93 | end 94 | 95 | function HashVocab:add(w) 96 | if self._index[w] ~= nil then 97 | return self._index[w] 98 | end 99 | self.size = self.size + 1 100 | self._tokens[self.size] = w 101 | self._index[w] = self.size 102 | return self.size 103 | end 104 | 105 | -- returns index/hashed vocab representation 106 | -- if gpu_mode is null then assume that we want IntTensor 107 | function HashVocab:index(word, gpu_mode) 108 | assert(word ~= nil, "Must specify word to index") 109 | local tensor = torch.DoubleTensor(self.size):zero() 110 | local hashed_items = self:hash(word) 111 | 112 | for i = 1, #hashed_items do 113 | local token = hashed_items[i] 114 | local index = self._index[token] 115 | if index == nil then 116 | if self.unk_index == nil then 117 | error('Token not in HashVocabulary and no UNK token defined: ' .. w) 118 | else 119 | index = self.unk_index 120 | end 121 | end 122 | tensor[index] = tensor[index] + 1 123 | end 124 | return tensor 125 | end 126 | 127 | function HashVocab:token(i) 128 | if i < 1 or i > self.size then 129 | error('Index ' .. i .. ' out of bounds') 130 | end 131 | return self._tokens[i] 132 | end 133 | 134 | function HashVocab:tokens(indeces) 135 | local output = {} 136 | local len = #indeces 137 | for i = 1, len do 138 | output[i] = self:token(indeces[i]) 139 | end 140 | return output 141 | end 142 | 143 | -- Converts tokens including unknowns to IntTensor 144 | -- tokens: a table of strings 145 | function HashVocab:map(tokens) 146 | assert(tokens ~= nil, "Tokens must not be null") 147 | local len = #tokens 148 | local output = torch.DoubleTensor(len, self.size) 149 | for i = 1, len do 150 | output[i] = self:index(tokens[i]) 151 | end 152 | return output 153 | end 154 | 155 | -- Converts tokens excluding unknowns to int indeces 156 | function HashVocab:map_no_unk(tokens) 157 | assert(tokens ~= nil, "Tokens must not be null") 158 | self:map(tokens) 159 | end 160 | 161 | function HashVocab:add_pad_token() 162 | if self.unk_token ~= nil then return end 163 | self.pad_index = self:add('') 164 | end 165 | 166 | function HashVocab:add_unk_token() 167 | if self.unk_token ~= nil then return end 168 | self.unk_index = self:add('') 169 | end 170 | 171 | function HashVocab:add_start_token() 172 | if self.start_token ~= nil then return end 173 | self.start_index = self:add('') 174 | end 175 | 176 | function HashVocab:add_end_token() 177 | if self.end_token ~= nil then return end 178 | self.end_index = self:add('') 179 | end 180 | -------------------------------------------------------------------------------- /datasets/util/SparseHashVocab.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | A SparseHashVocabulary object. Initialized from a file with one SparseHashVocabulary token per line. 4 | Maps between SparseHashVocabulary tokens and indices. If an UNK token is defined in the 5 | HashVocabulary, returns the index to this token if queried for an out-of-HashVocabulary 6 | token. 7 | 8 | --]] 9 | 10 | local SparseHashVocab = torch.class('datasets.SparseHashVocab') 11 | 12 | function SparseHashVocab:__init(path, add_unk) 13 | assert(add_unk ~= nil, "Must determine whether to add unknown token or not") 14 | print("Loading SparseHashVocabulary from path " .. path) 15 | 16 | self.hashed = true 17 | self.size = 0 18 | self._index = {} 19 | self._tokens = {} 20 | 21 | -- Include special start symbol and end symbol 22 | local file = io.open(path) 23 | if file == nil then error("Error opening file " .. path .. "\n") end 24 | 25 | -- Gets all tokens from line via hashing and adds 26 | local function add_line(line) 27 | local hashed_items = self:hash(line) 28 | for i = 1, #hashed_items do 29 | local hashed_item = hashed_items[i] 30 | self:add(hashed_item) 31 | end 32 | end 33 | 34 | local num_lines = 0 35 | while true do 36 | local line = file:read() 37 | num_lines = num_lines + 1 38 | if line == nil then break end 39 | add_line(line) 40 | end 41 | file:close() 42 | 43 | local unks = {'', '', 'UUUNKKK'} 44 | for _, tok in pairs(unks) do 45 | self.unk_index = self.unk_index or self._index[tok] 46 | if self.unk_index ~= nil then 47 | self.unk_token = tok 48 | break 49 | end 50 | end 51 | 52 | local starts = {'', ''} 53 | for _, tok in pairs(starts) do 54 | self.start_index = self.start_index or self._index[tok] 55 | if self.start_index ~= nil then 56 | self.start_token = tok 57 | break 58 | end 59 | end 60 | 61 | local ends = {'', ''} 62 | for _, tok in pairs(ends) do 63 | self.end_index = self.end_index or self._index[tok] 64 | if self.end_index ~= nil then 65 | self.end_token = tok 66 | break 67 | end 68 | end 69 | 70 | if add_unk then 71 | self:add_start_token() 72 | self:add_end_token() 73 | self:add_unk_token() 74 | end 75 | 76 | print("Loaded all the SparseHashVocabulary from " .. path .. " size is " .. self.size) 77 | end 78 | 79 | -- Hashes word into word vector 80 | function SparseHashVocab:hash(w) 81 | hashed_items = {} 82 | padded_word = "#" .. w .. "#" 83 | for i = 1, #padded_word - 2 do 84 | table.insert(hashed_items, padded_word:sub(i, i + 2)) 85 | end 86 | return hashed_items 87 | end 88 | 89 | function SparseHashVocab:contains(w) 90 | if not self._index[w] then return false end 91 | return true 92 | end 93 | 94 | function SparseHashVocab:add(w) 95 | if self._index[w] ~= nil then 96 | return self._index[w] 97 | end 98 | self.size = self.size + 1 99 | self._tokens[self.size] = w 100 | self._index[w] = self.size 101 | return self.size 102 | end 103 | 104 | -- returns index/hashed vocab representation 105 | function SparseHashVocab:index(token, gpu_mode) 106 | assert(token ~= nil, "Must specify token SparseHashVocab needs to index") 107 | assert(gpu_mode ~= nil, "Must specify whether to use gpu mode or not") 108 | 109 | local index = self._index[token] 110 | if index == nil then 111 | if self.unk_index == nil then 112 | error('Token not in SparseHashVocabulary and no UNK token defined: ' .. w) 113 | else 114 | index = self.unk_index 115 | end 116 | end 117 | return index 118 | end 119 | 120 | function SparseHashVocab:token(i) 121 | if i < 1 or i > self.size then 122 | error('Index ' .. i .. ' out of bounds') 123 | end 124 | return self._tokens[i] 125 | end 126 | 127 | function SparseHashVocab:tokens(indeces) 128 | local output = {} 129 | local len = #indeces 130 | for i = 1, len do 131 | output[i] = self:token(indeces[i]) 132 | end 133 | return output 134 | end 135 | 136 | -- Converts tokens including unknowns to IntTensor if gpu_mode is null or false, CudaTensor if true 137 | -- tokens: a table of strings 138 | -- returns: IntTensor with indeces, and IntTensor with word lengths 139 | function SparseHashVocab:map(tokens, gpu_mode) 140 | assert(tokens ~= nil, "Tokens must not be null") 141 | local cur_type = gpu_mode and torch.CudaTensor or torch.IntTensor 142 | local len = #tokens 143 | 144 | local num_chars = 0 145 | local word_lengths = cur_type(len) 146 | 147 | -- compute number of characters 148 | for i = 1, #tokens do 149 | num_chars = num_chars + #tokens[i] 150 | word_lengths[i] = #self:hash(tokens[i]) 151 | end 152 | 153 | local chars = cur_type(num_chars) 154 | 155 | local cur_index = 1 156 | for i = 1, #tokens do 157 | local word = tokens[i] 158 | local hashed_items = self:hash(word) 159 | for j = 1, #hashed_items do 160 | local cur_char = hashed_items[j] 161 | local index = self:index(cur_char, gpu_mode) 162 | chars[cur_index] = index 163 | cur_index = cur_index + 1 164 | end 165 | end 166 | 167 | assert(chars:size(1) == word_lengths:sum(), "Number of chars and sum of word lengths must match up") 168 | 169 | local res = {chars, word_lengths} 170 | return res 171 | end 172 | 173 | -- Converts tokens excluding unknowns to int indeces 174 | function SparseHashVocab:map_no_unk(tokens) 175 | assert(tokens ~= nil, "Tokens must not be null") 176 | self:map(tokens) 177 | end 178 | 179 | function SparseHashVocab:add_unk_token() 180 | if self.unk_token ~= nil then return end 181 | self.unk_index = self:add('') 182 | end 183 | 184 | function SparseHashVocab:add_start_token() 185 | if self.start_token ~= nil then return end 186 | self.start_index = self:add('') 187 | end 188 | 189 | function SparseHashVocab:add_end_token() 190 | if self.end_token ~= nil then return end 191 | self.end_index = self:add('') 192 | end 193 | -------------------------------------------------------------------------------- /datasets/util/Vocab.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | A vocabulary object. Initialized from a file with one vocabulary token per line. 4 | Maps between vocabulary tokens and indices. If an UNK token is defined in the 5 | vocabulary, returns the index to this token if queried for an out-of-vocabulary 6 | token. 7 | 8 | --]] 9 | 10 | local Vocab = torch.class('datasets.Vocab') 11 | 12 | function Vocab:__init(path, add_unk) 13 | assert(add_unk ~= nil) 14 | print("Loading vocabulary from path " .. path) 15 | 16 | self.hashed = false 17 | self.size = 0 18 | self._index = {} 19 | self._tokens = {} 20 | 21 | -- Include special start symbol and end symbol 22 | local file = io.open(path, 'r') 23 | if file == nil then error("Error opening file " .. path .. "\n") end 24 | 25 | while true do 26 | local line = file:read() 27 | if line == nil then break end 28 | self.size = self.size + 1 29 | self._tokens[self.size] = line 30 | self._index[line] = self.size 31 | end 32 | file:close() 33 | 34 | local unks = {'', '', 'UUUNKKK'} 35 | for _, tok in pairs(unks) do 36 | self.unk_index = self.unk_index or self._index[tok] 37 | if self.unk_index ~= nil then 38 | self.unk_token = tok 39 | break 40 | end 41 | end 42 | 43 | local starts = {'', ''} 44 | for _, tok in pairs(starts) do 45 | self.start_index = self.start_index or self._index[tok] 46 | if self.start_index ~= nil then 47 | self.start_token = tok 48 | break 49 | end 50 | end 51 | 52 | local ends = {'', ''} 53 | for _, tok in pairs(ends) do 54 | self.end_index = self.end_index or self._index[tok] 55 | if self.end_index ~= nil then 56 | self.end_token = tok 57 | break 58 | end 59 | end 60 | 61 | if add_unk then 62 | self:add_start_token() 63 | self:add_end_token() 64 | self:add_pad_token() 65 | self:add_unk_token() 66 | end 67 | 68 | print("Loaded all the vocabulary from " .. path .. " size is " .. self.size) 69 | end 70 | 71 | function Vocab:contains(w) 72 | if not self._index[w] then return false end 73 | return true 74 | end 75 | 76 | function Vocab:add(w) 77 | if self._index[w] ~= nil then 78 | return self._index[w] 79 | end 80 | self.size = self.size + 1 81 | self._tokens[self.size] = w 82 | self._index[w] = self.size 83 | return self.size 84 | end 85 | 86 | function Vocab:index(w) 87 | local index = self._index[w] 88 | if index == nil then 89 | if self.unk_index == nil then 90 | error('Token not in vocabulary and no UNK token defined: ' .. w) 91 | end 92 | return self.unk_index 93 | end 94 | return index 95 | end 96 | 97 | function Vocab:token(i) 98 | if i < 1 or i > self.size then 99 | error('Index ' .. i .. ' out of bounds') 100 | end 101 | return self._tokens[i] 102 | end 103 | 104 | function Vocab:tokens(indeces) 105 | assert(indeces ~= nil, "Must specify indeces to predict on") 106 | local output = {} 107 | 108 | -- get length: first case is when it's a table, second when an IntTensor 109 | local len = (torch.typename(indeces) == 'torch.IntTensor' or 110 | torch.typename(indeces) == 'torch.CudaTensor') 111 | and indeces:size(1) or #indeces 112 | for i = 1, len do 113 | output[i] = self:token(indeces[i]) 114 | end 115 | return output 116 | end 117 | 118 | -- Converts tokens including unknowns to IntTensor 119 | function Vocab:map(tokens, gpu_mode) 120 | assert(tokens ~= nil, "Must specify tokens to map") 121 | assert(gpu_mode ~= nil, "Must specify gpu mode to use for mapping tokens") 122 | --local use_gpu = (gpu_mode == nil) and false or gpu_mode 123 | 124 | --local len = #tokens 125 | --local output = use_gpu and torch.CudaTensor(len) or torch.IntTensor(len) 126 | --for i = 1, len do 127 | -- output[i] = self:index(tokens[i]) 128 | --end 129 | --return output 130 | return self:map_no_unk(tokens, gpu_mode) 131 | end 132 | 133 | -- Converts tokens excluding unknowns to int indeces 134 | function Vocab:map_no_unk(tokens, gpu_mode) 135 | assert(tokens ~= nil, "Must specify tokens to map") 136 | assert(gpu_mode ~= nil, "Must specify gpu mode") 137 | 138 | local use_gpu = gpu_mode 139 | 140 | local len = #tokens 141 | local has_unk = false 142 | for i = 1, #tokens do 143 | local index = self:index(tokens[i]) 144 | if #tokens > 30 then 145 | --print(tokens[i]) 146 | end 147 | if index == self.unk_index then 148 | --print(tokens[i]) 149 | --print(index) 150 | len = len - 1 151 | end 152 | end 153 | 154 | if has_unk then num_unk_sentences = num_unk_sentences + 1 end 155 | local output = use_gpu and torch.CudaTensor(len) or torch.IntTensor(len) 156 | local curr_index = 1 157 | for i = 1, #tokens do 158 | local index = self:index(tokens[i]) 159 | if index ~= self.unk_index then 160 | output[curr_index] = index 161 | curr_index = curr_index + 1 162 | end 163 | end 164 | return output 165 | end 166 | 167 | function Vocab:add_pad_token() 168 | if self.unk_token ~= nil then return end 169 | self.pad_index = self:add('') 170 | end 171 | 172 | function Vocab:add_unk_token() 173 | if self.unk_token ~= nil then return end 174 | self.unk_index = self:add('') 175 | end 176 | 177 | function Vocab:add_start_token() 178 | if self.start_token ~= nil then return end 179 | self.start_index = self:add('') 180 | end 181 | 182 | function Vocab:add_end_token() 183 | if self.end_token ~= nil then return end 184 | self.end_index = self:add('') 185 | end 186 | -------------------------------------------------------------------------------- /datasets/util/math.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Various utility functions on tables 4 | 5 | --]] 6 | 7 | -- onelined version ;) 8 | -- getPath=function(str,sep)sep=sep or'/'return str:match("(.*"..sep..")")end 9 | function get_dir(str,sep) 10 | sep=sep or'/' 11 | return str:match("(.*"..sep..")") 12 | end 13 | 14 | -- makes a directory if not found in path 15 | function make_dir(path) 16 | local base_dir = get_dir(path) 17 | if lfs.attributes(base_dir) == nil then 18 | print("Directory not found for " .. path .. ", making new directory at " .. base_dir) 19 | lfs.mkdir(base_dir) 20 | end 21 | end 22 | 23 | function sleep(s) 24 | local ntime = os.time() + s 25 | repeat until os.time() > ntime 26 | end 27 | 28 | function trim(string) 29 | return (string:gsub("^%s*(.-)%s*$", "%1")) 30 | end 31 | 32 | -- Make a deep copy of a table 33 | function deepcopy(orig) 34 | local orig_type = type(orig) 35 | local copy 36 | if orig_type == 'table' then 37 | copy = {} 38 | for orig_key, orig_value in next, orig, nil do 39 | copy[deepcopy(orig_key)] = deepcopy(orig_value) 40 | end 41 | setmetatable(copy, deepcopy(getmetatable(orig))) 42 | else -- number, string, boolean, etc 43 | copy = orig 44 | end 45 | return copy 46 | end 47 | 48 | -- Check type of input 49 | function check_type(input, desired_type) 50 | assert(input ~= nil, "Must specify input type for check_type function") 51 | assert(desired_type ~= nil, "Must specify desired type for check_type function") 52 | 53 | local input_type = torch.typename(input) or "NULL" 54 | assert(input_type == desired_type, "input has type " .. input_type .. " but desired is " .. desired_type) 55 | end 56 | 57 | -- returns a new zero unit (of cuda or cpu mode) 58 | function new_zero_unit(gpu_mode, mem_dim) 59 | return gpu_mode and torch.zeros(mem_dim):cuda() 60 | or torch.zeros(mem_dim) 61 | end 62 | 63 | function check_valid_gpu_inputs(inputs, gpu_mode) 64 | assert(inputs ~= nil, "Must specify inputs to forward") 65 | assert(gpu_mode ~= nil, "Must specify whether to use gpu mode or not") 66 | 67 | local corr_type = gpu_mode and 'torch.CudaTensor' or 'torch.DoubleTensor' 68 | check_type(inputs, corr_type) 69 | end 70 | 71 | function new_hidden_activations_lstm(gpu_mode, mem_dim, num_layers) 72 | if num_layers == 1 then 73 | return {new_zero_unit(gpu_mode, mem_dim), new_zero_unit(gpu_mode, mem_dim)} 74 | else 75 | local modules = {{},{}} 76 | for i = 1, num_layers do 77 | table.insert(modules[1], new_zero_unit(gpu_mode, mem_dim)) 78 | table.insert(modules[2], new_zero_unit(gpu_mode, mem_dim)) 79 | end 80 | return modules 81 | end 82 | end 83 | 84 | -- Enable dropouts 85 | function enable_sequential_dropouts(model) 86 | for i,m in ipairs(model.modules) do 87 | if m.module_name == "nn.Dropout" or torch.typename(m) == "nn.Dropout" then 88 | m:training() 89 | end 90 | end 91 | end 92 | 93 | -- Disable dropouts 94 | function disable_sequential_dropouts(model) 95 | for i,m in ipairs(model.modules) do 96 | if m.module_name == "nn.Dropout" or torch.typename(m) == "nn.Dropout" then 97 | m:evaluate() 98 | end 99 | end 100 | end 101 | 102 | -- adds modules into parallel network from module list 103 | -- requires parallel_net is of type nn.parallel 104 | -- requires module_list is an array of modules that is not null 105 | -- modifies: parallel_net by adding modules into parallel net 106 | function add_modules(parallel_net, module_list) 107 | assert(parallel_net ~= nil, "parallel net is null") 108 | assert(module_list ~= nil, "modules you're trying to add are null") 109 | 110 | for i = 1, #module_list do 111 | curr_module = module_list[i] 112 | parallel_net:add(curr_module) 113 | end 114 | end 115 | 116 | -- adds modules into parallel network from module list 117 | -- requires parallel_net is of type nn.parallel 118 | -- requires module_list is an array of modules that is not null 119 | -- modifies: parallel_net by adding modules into parallel net 120 | function insert_modules_to_table(curr_table, mod_list) 121 | assert(curr_table ~= nil) 122 | assert(mod_list ~= nil, "Module list must not be null") 123 | 124 | for i = 1, #mod_list do 125 | curr_module = mod_list[i] 126 | table.insert(curr_table, curr_module) 127 | end 128 | end 129 | 130 | -- Convert 1-d torch tensor to lua table 131 | function tensor_to_array(t1) 132 | -- This assumes `t1` is a 2-dimensional tensor! 133 | local t2 = {} 134 | for i=1,t1:size(1) do 135 | t2[i] = t1[i] 136 | end 137 | return t2 138 | end 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /delete.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | 5 | echo "Removing exited docker containers..." 6 | docker ps -a -f status=exited -q | xargs -r docker rm -v 7 | 8 | echo "Removing dangling images..." 9 | docker images --no-trunc -q -f dangling=true | xargs -r docker rmi 10 | 11 | echo "Removing unused docker images" 12 | images=($(docker images --digests | tail -n +2 | awk '{ img_id=$1; if($2!="")img_id=img_id":"$2; if($3!="") img_id=img_id"@"$3; print img_id}')) 13 | containers=($(docker ps -a | tail -n +2 | awk '{print $2}')) 14 | 15 | containers_reg=" ${containers[*]} " 16 | remove=() 17 | 18 | for item in ${images[@]}; do 19 | if [[ ! $containers_reg =~ " $item " ]]; then 20 | remove+=($item) 21 | fi 22 | done 23 | 24 | remove_images=" ${remove[*]} " 25 | 26 | echo ${remove_images} | xargs -r docker rmi 27 | echo "Done" 28 | 29 | docker stop $(docker ps -a -q) 30 | docker kill $(sudo docker ps -a -q) -------------------------------------------------------------------------------- /dmn/README.md: -------------------------------------------------------------------------------- 1 | # DynamicMemoryNetworks 2 | -------------------------------------------------------------------------------- /dmn/init.lua: -------------------------------------------------------------------------------- 1 | require('torch') 2 | require('nn') 3 | require('nngraph') 4 | require('optim') 5 | require('xlua') 6 | require('sys') 7 | require('lfs') 8 | require('io') 9 | require('json') 10 | require('gnuplot') 11 | 12 | package.path = package.path .. ';/Users/david/.luarocks/share/lua/5.1/?.lua;/Users/david/.luarocks/share/lua/5.1/?/init.lua;/Users/david/torch/install/share/lua/5.1/?.lua;/Users/david/torch/install/share/lua/5.1/?/init.lua;./?.lua;/Users/david/torch/install/share/luajit-2.1.0-alpha/?.lua;/usr/local/share/lua/5.1/?.lua;/usr/local/share/lua/5.1/?/init.lua' 13 | .. ";/Users/David/Desktop/nlp/deeplearning/softmax/?/init.lua" 14 | .. ";/Users/David/Desktop/softmax/?/init.lua" 15 | .. ";/home/ubuntu/softmax/?/init.lua;" 16 | .. "/softmax/?/init.lua" 17 | .. ";/home/david/Desktop/softmax/?/init.lua;" 18 | 19 | -- image processing and async libraries 20 | lua_json = include('../opensource/json.lua') 21 | async = require('async') 22 | require('image') 23 | gm = require('graphicsmagick') 24 | 25 | print("Loading all DMN models") 26 | package.path = package.path .. ';../dmn/?' 27 | 28 | local num_threads = 6 29 | torch.setnumthreads(num_threads) 30 | -- for dataset processing utils 31 | dmn = {} 32 | 33 | dmn.dummy_path = 'data/QA/vocab.txt' 34 | dmn.models_dir = 'trained_models/' 35 | dmn.predictions_dir = 'predictions/' 36 | 37 | -- Utility functions for networks (Composite patterns) 38 | include('util/functions.lua') 39 | 40 | -- Utility constants 41 | include('util/constants.lua') 42 | 43 | -- Utility IO functions (loading images etc) 44 | include('util/io_functions.lua') 45 | 46 | -- Utility math functions (calculating mean, etc) 47 | include('util/math_functions.lua') 48 | 49 | -- Utility evaluate functions (for lua) 50 | include('util/eval_functions.lua') 51 | 52 | -- Logging functions for networks (Logging on local/cloud) 53 | include('util/logger.lua') 54 | include('util/print_logger.lua') 55 | 56 | dmn.logger = dmn.Logger() 57 | dmn.logger:add_logger(dmn.PrintLogger()) 58 | 59 | -- For squeeze unit 60 | include('models/nn_units/Squeeze.lua') 61 | 62 | -- For optimizer 63 | include('models/nn_units/Optim.lua') 64 | 65 | 66 | -- For attention units 67 | include('models/nn_units/CRowAddTable.lua') 68 | include('models/nn_units/SmoothCosineSimilarity.lua') 69 | include('models/nn_units/JoinTable.lua') 70 | include('models/nn_units/PaddedJoinTable.lua') 71 | include('models/nn_units/SpatialCrossLRN.lua') 72 | include('models/nn_units/Linear.lua') 73 | 74 | -- change linear to point to correct table 75 | --nn.Linear = dmn.Linear 76 | 77 | -- For all the rnn units 78 | include('models/rnn_units/units.lua') 79 | 80 | -- Utility functions for lstm units 81 | include('models/RNN_Utils.lua') 82 | 83 | -- Recurrent models 84 | include('models/Attention_LSTM_Decoder.lua') 85 | include('models/LSTM_Decoder.lua') 86 | 87 | -- Deep semantic similarity network 88 | include('models/DSSM_Layer.lua') 89 | 90 | -- Input models 91 | include('input_module/input_layers/InputLayer.lua') 92 | include('input_module/input_layers/BOWLayer.lua') 93 | include('input_module/input_layers/EmbedLayer.lua') 94 | include('input_module/input_layers/HashLayer.lua') 95 | include('input_module/input_layers/SparseHashLayer.lua') 96 | include('input_module/input_layers/FastHashLayer.lua') 97 | 98 | -- Hidden models 99 | include('input_module/hidden_layers/HiddenLayer.lua') 100 | include('input_module/hidden_layers/HiddenDummyLayer.lua') 101 | include('input_module/hidden_layers/HiddenIdentityLayer.lua') 102 | include('input_module/hidden_layers/HiddenProjLayer.lua') 103 | include('input_module/hidden_layers/HiddenGRUProjLayer.lua') 104 | 105 | -- Answer reranking module 106 | include('answer_module/AnswerRerankModule.lua') 107 | 108 | -- Semantic memory module 109 | include('semantic_memory_module/WordEmbedModule.lua') 110 | 111 | -- Attention Network with DSSMs 112 | include('dmn_network/Attention_Network.lua') 113 | 114 | printf = utils.printf 115 | 116 | -- For data 117 | require('../datasets') 118 | 119 | -- For trainers 120 | require('../apis') 121 | 122 | -- Check python if it exists 123 | if dmn.functions.module_exists('fb.python') then 124 | python = {} 125 | 126 | python.py = require('fb.python') 127 | python.np = python.py.import("numpy") 128 | python.pil_image = python.py.import("PIL.Image") 129 | 130 | -- To load any image 131 | python.py.exec([=[ 132 | import PIL.Image as Image 133 | Image.MAX_IMAGE_PIXELS = None 134 | ]=]) 135 | 136 | end 137 | 138 | -- share parameters of nngraph gModule instances 139 | function share_params(cell, src, ...) 140 | for i = 1, #cell.forwardnodes do 141 | local node = cell.forwardnodes[i] 142 | if node.data.module then 143 | --print(node.data.module) 144 | node.data.module:share(src.forwardnodes[i].data.module, ...) 145 | end 146 | end 147 | end 148 | 149 | function header(s) 150 | dmn.logger:print(string.rep('-', 80)) 151 | dmn.logger:print(s) 152 | dmn.logger:print(string.rep('-', 80)) 153 | end 154 | 155 | print("Done loading modules for dynamic memory network") 156 | 157 | -------------------------------------------------------------------------------- /dmn/input_module/hidden_layers/HiddenDummyLayer.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Hidden Dummy Layer: Just feeds in zeros 4 | --]] 5 | 6 | local HiddenDummyLayer, parent = torch.class('dmn.HiddenDummyLayer', 'dmn.HiddenLayer') 7 | 8 | function HiddenDummyLayer:__init(config) 9 | parent.__init(self, config) 10 | self.hidden_init = self:new_hidden_activations() 11 | end 12 | 13 | function HiddenDummyLayer:new_hidden_activations() 14 | if self.num_layers == 1 then 15 | return {torch.zeros(self.proj_dim), torch.zeros(self.proj_dim)} 16 | else 17 | local modules = {} 18 | for i = 1, self.num_layers do 19 | table.insert(modules, {torch.zeros(self.proj_dim), torch.zeros(self.proj_dim)}) 20 | table.insert(modules, {torch.zeros(self.proj_dim), torch.zeros(self.proj_dim)}) 21 | end 22 | return modules 23 | end 24 | end 25 | 26 | -- Returns all of the weights of this module 27 | function HiddenDummyLayer:getWeights() 28 | end 29 | 30 | function HiddenDummyLayer:getModules() 31 | return {} 32 | end 33 | 34 | -- Sets gpu mode 35 | function HiddenDummyLayer:set_gpu_mode() 36 | end 37 | 38 | function HiddenDummyLayer:set_cpu_mode() 39 | end 40 | 41 | -- Enable Dropouts 42 | function HiddenDummyLayer:enable_dropouts() 43 | end 44 | 45 | -- Disable Dropouts 46 | function HiddenDummyLayer:disable_dropouts() 47 | end 48 | 49 | -- Does a single forward step of concat layer, concatenating 50 | -- 51 | function HiddenDummyLayer:forward(input_vals) 52 | return self.hidden_init 53 | end 54 | 55 | -- Does a single backward step of project layer 56 | -- image_feats: input into hidden projection error 57 | -- cell_errors: error of all hidden, cell units of lstm with respect to input 58 | function HiddenDummyLayer:backward(input_vals, cell_errors) 59 | end 60 | 61 | -- Returns size of outputs of this combine module 62 | function HiddenDummyLayer:getOutputSize() 63 | end 64 | 65 | function HiddenDummyLayer:getParameters() 66 | end 67 | 68 | -- zeros out the gradients 69 | function HiddenDummyLayer:zeroGradParameters() 70 | end 71 | 72 | function HiddenDummyLayer:normalizeGrads(batch_size) 73 | end 74 | 75 | -------------------------------------------------------------------------------- /dmn/input_module/hidden_layers/HiddenGRUProjLayer.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Hidden Project Layer: Projects image input into projection dimension twice. For feeding in 4 | image input into lstm 5 | --]] 6 | 7 | local HiddenProjLayer, parent = torch.class('dmn.HiddenGRUProjLayer', 'dmn.HiddenLayer') 8 | 9 | function HiddenProjLayer:__init(config) 10 | assert(config.mem_dim ~= nil, "Must specify memory dimensions") 11 | assert(config.input_dim ~= nil, "Must specify input dimensions") 12 | assert(config.dropout_prob ~= nil, "Must specify dropout probability") 13 | assert(config.num_layers ~= nil, "Must specify number of layers") 14 | assert(config.dropout ~= nil, "Must specify dropout") 15 | 16 | self.gpu_mode = config.gpu_mode 17 | self.input_dim = config.input_dim 18 | self.proj_dim = config.mem_dim 19 | self.dropout_prob = config.dropout_prob 20 | self.dropout = config.dropout 21 | self.num_layers = config.num_layers 22 | 23 | local modules = nn.Parallel() 24 | -- image feature embedding 25 | if self.num_layers == 1 then 26 | local hidden_emb = self:new_hidden_module() 27 | self.hidden_emb = hidden_emb 28 | modules:add(self.hidden_emb) 29 | 30 | else 31 | self.hidden_emb = {} 32 | for i = 1, self.num_layers do 33 | local hidden_emb = self:new_hidden_module() 34 | table.insert(self.hidden_emb, hidden_emb) 35 | 36 | modules:add(self.hidden_emb[i]) 37 | end 38 | end 39 | 40 | 41 | if gpu_mode then 42 | self:set_gpu_mode() 43 | end 44 | end 45 | 46 | function HiddenProjLayer:new_hidden_module() 47 | local hidden_emb = nn.Sequential() 48 | :add(nn.Linear(self.input_dim, self.proj_dim)) 49 | 50 | if self.dropout then 51 | hidden_emb:add(nn.Dropout(self.dropout_prob, false)) 52 | end 53 | return hidden_emb 54 | end 55 | 56 | -- Returns all of the weights of this module 57 | function HiddenProjLayer:getWeights() 58 | return self.params 59 | end 60 | 61 | function HiddenProjLayer:getModules() 62 | if self.num_layers == 1 then 63 | return {self.hidden_emb} 64 | else 65 | local modules = {} 66 | for i = 1, self.num_layers do 67 | table.insert(modules, self.hidden_emb[i]) 68 | end 69 | return modules 70 | end 71 | end 72 | 73 | -- Sets gpu mode 74 | function HiddenProjLayer:set_gpu_mode() 75 | self.gpu_mode = true 76 | if self.num_layers == 1 then 77 | self.hidden_emb:cuda() 78 | else 79 | for i = 1, self.num_layers do 80 | self.hidden_emb[i]:cuda() 81 | end 82 | end 83 | end 84 | 85 | function HiddenProjLayer:set_cpu_mode() 86 | self.gpu_mode = false 87 | if self.num_layers == 1 then 88 | self.hidden_emb:double() 89 | else 90 | for i = 1, self.num_layers do 91 | self.hidden_emb[i]:double() 92 | end 93 | end 94 | end 95 | 96 | -- Enable Dropouts 97 | function HiddenProjLayer:enable_dropouts() 98 | if self.num_layers == 1 then 99 | enable_sequential_dropouts(self.hidden_emb) 100 | else 101 | for i = 1, self.num_layers do 102 | enable_sequential_dropouts(self.hidden_emb[i]) 103 | end 104 | end 105 | end 106 | 107 | -- Disable Dropouts 108 | function HiddenProjLayer:disable_dropouts() 109 | if self.num_layers == 1 then 110 | disable_sequential_dropouts(self.hidden_emb) 111 | else 112 | for i = 1, self.num_layers do 113 | disable_sequential_dropouts(self.hidden_emb[i]) 114 | end 115 | end 116 | end 117 | 118 | -- Does a single forward step of concat layer, concatenating 119 | -- 120 | function HiddenProjLayer:forward(inputs) 121 | assert(inputs ~= nil, "Hidden inputs are null") 122 | local ndim = inputs:dim() 123 | 124 | assert(inputs:size(ndim) == self.input_dim, 125 | "Dimension mismatch on hidden inputs " .. " expected " .. self.input_dim) 126 | parent:forward(inputs, self.gpu_mode) 127 | 128 | if self.num_layers == 1 then 129 | self.hidden_image_proj = self.hidden_emb:forward(inputs) 130 | return self.hidden_image_proj 131 | else 132 | local hidden_vals = {} 133 | 134 | for i = 1, self.num_layers do 135 | local hidden_image_proj = self.hidden_emb[i]:forward(inputs) 136 | table.insert(hidden_vals, hidden_image_proj) 137 | end 138 | 139 | return hidden_vals 140 | end 141 | 142 | end 143 | 144 | -- Does a single backward step of project layer 145 | -- inputs: input into hidden projection error 146 | -- cell_errors: error of all hidden, cell units of lstm with respect to input 147 | function HiddenProjLayer:backward(inputs, cell_errors) 148 | assert(inputs ~= nil) 149 | assert(inputs:size(inputs:dim()) == self.input_dim, 150 | "Dimension mismatch on hidden inputs " .. " expected " 151 | .. self.input_dim) 152 | assert(cell_errors ~= nil) 153 | 154 | local input_errors = torch.zeros(inputs:size()) 155 | if self.num_layers == 1 then 156 | -- get the image and word projection errors 157 | local hidden_emb_errors = self.hidden_emb:backward(inputs, cell_errors) 158 | input_errors = hidden_emb_errors 159 | else 160 | for i = 1, self.num_layers do 161 | -- get the image and word projection errors 162 | local hidden_emb_errors = cell_errors[i] 163 | 164 | -- feed them backward 165 | local hidden_input_errors = self.hidden_emb[i]:backward(inputs, hidden_emb_errors) 166 | input_errors = hidden_input_errors + input_errors 167 | end 168 | end 169 | return input_errors 170 | end 171 | 172 | -- Returns size of outputs of this combine module 173 | function HiddenProjLayer:getOutputSize() 174 | return self.mem_dim 175 | end 176 | 177 | function HiddenProjLayer:getParameters() 178 | return self.params, self.grad_params 179 | end 180 | 181 | -- zeros out the gradients 182 | function HiddenProjLayer:zeroGradParameters() 183 | if self.num_layers == 1 then 184 | self.cell_emb:zeroGradParameters() 185 | self.hidden_emb:zeroGradParameters() 186 | else 187 | for i = 1, self.num_layers do 188 | self.cell_emb[i]:zeroGradParameters() 189 | self.hidden_emb[i]:zeroGradParameters() 190 | end 191 | end 192 | end 193 | 194 | function HiddenProjLayer:normalizeGrads(batch_size) 195 | assert(batch_size ~= nil) 196 | if self.num_layers == 1 then 197 | self.cell_emb.gradWeight:div(batch_size) 198 | self.hidden_emb.gradWeight:div(batch_size) 199 | else 200 | for i = 1, self.num_layers do 201 | self.cell_emb[i].gradWeight:div(batch_size) 202 | self.hidden_emb[i].gradWeight:div(batch_size) 203 | end 204 | end 205 | end 206 | 207 | -------------------------------------------------------------------------------- /dmn/input_module/hidden_layers/HiddenIdentityLayer.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Hidden Identity Layer: Projects image input into projection dimension twice. For feeding in 4 | image input into lstm 5 | --]] 6 | 7 | local HiddenIdentityLayer, parent = torch.class('dmn.HiddenIdentityLayer', 'dmn.HiddenLayer') 8 | 9 | function HiddenIdentityLayer:__init(config) 10 | parent.__init(self, config) 11 | end 12 | 13 | function HiddenIdentityLayer:getModules() 14 | return {} 15 | end 16 | 17 | -- Sets gpu mode 18 | function HiddenIdentityLayer:set_gpu_mode() 19 | self.gpu_mode = true 20 | end 21 | 22 | function HiddenIdentityLayer:set_cpu_mode() 23 | self.gpu_mode = false 24 | end 25 | 26 | -- Enable Dropouts 27 | function HiddenIdentityLayer:enable_dropouts() 28 | end 29 | 30 | -- Disable Dropouts 31 | function HiddenIdentityLayer:disable_dropouts() 32 | end 33 | 34 | -- Does a single forward step of identity layer, returns inputs 35 | -- 36 | function HiddenIdentityLayer:forward(inputs) 37 | return inputs 38 | end 39 | 40 | -- Does a single backward step of project layer 41 | -- image_feats: input into hidden projection error 42 | -- cell_errors: error of all hidden, cell units of lstm with respect to input 43 | function HiddenIdentityLayer:backward(inputs, cell_errors) 44 | assert(inputs ~= nil) 45 | assert(cell_errors ~= nil) 46 | parent:backward(inputs, cell_errors, self.gpu_mode) 47 | 48 | return cell_errors 49 | end 50 | 51 | -- Returns size of outputs of this combine module 52 | function HiddenIdentityLayer:getOutputSize() 53 | return self.mem_dim * self.num_layers 54 | end 55 | 56 | function HiddenIdentityLayer:getParameters() 57 | return {} 58 | end 59 | 60 | -- zeros out the gradients 61 | function HiddenIdentityLayer:zeroGradParameters() 62 | end 63 | 64 | function HiddenIdentityLayer:normalizeGrads(batch_size) 65 | end 66 | 67 | -------------------------------------------------------------------------------- /dmn/input_module/hidden_layers/HiddenLayer.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Hidden Layer base class 4 | 5 | --]] 6 | 7 | local HiddenLayer = torch.class('dmn.HiddenLayer') 8 | 9 | function HiddenLayer:__init(config) 10 | assert(config.mem_dim ~= nil, "Must specify memory dimensions") 11 | assert(config.input_dim ~= nil, "Must specify input dimensions") 12 | assert(config.dropout_prob ~= nil, "Must specify dropout probability") 13 | assert(config.num_layers ~= nil, "Must specify number of layers") 14 | assert(config.dropout ~= nil, "Must specify dropout") 15 | 16 | self.gpu_mode = config.gpu_mode 17 | self.input_dim = config.input_dim 18 | self.proj_dim = config.mem_dim 19 | self.dropout_prob = config.dropout_prob 20 | self.dropout = config.dropout 21 | self.num_layers = config.num_layers 22 | 23 | --print("Hidden layer dropout probability ", self.dropout_prob) 24 | end 25 | 26 | -- Returns all of the weights of this module 27 | function HiddenLayer:getWeights() 28 | error("Get weights not implemented!") 29 | end 30 | 31 | -- Returns all the nn modules of this layer as an array 32 | function HiddenLayer:getModules() 33 | error("Get modules not implemented!") 34 | end 35 | 36 | -- Sets gpu mode 37 | function HiddenLayer:set_gpu_mode() 38 | error("Set gpu mode not implemented!") 39 | end 40 | 41 | function HiddenLayer:set_cpu_mode() 42 | error("Set cpu mode not implemented!") 43 | end 44 | 45 | -- Enable Dropouts 46 | function HiddenLayer:enable_dropouts() 47 | error("Enable dropouts not implemented!") 48 | end 49 | 50 | -- Disable Dropouts 51 | function HiddenLayer:disable_dropouts() 52 | error("Disable dropouts not implemented!") 53 | end 54 | 55 | -- Does a single forward step of hidden layer, which 56 | -- projects inputs into hidden state for lstm. Returns an array 57 | -- Where first state corresponds to cell state, second state 58 | -- corresponds to first hidden state 59 | function HiddenLayer:forward(inputs, gpu_mode) 60 | assert(inputs ~= nil) 61 | assert(gpu_mode ~= nil) 62 | local cuda_type = gpu_mode and 'torch.CudaTensor' or 'torch.DoubleTensor' 63 | check_type(inputs, cuda_type) 64 | end 65 | 66 | -- Does a single backward step of hidden layer 67 | -- Cell errors is an array where first input is error with respect to 68 | -- cell inputs of lstm, second input is error with respect to hidden inputs 69 | -- of lstm 70 | function HiddenLayer:backward(inputs, cell_errors, gpu_mode) 71 | assert(inputs ~= nil) 72 | assert(gpu_mode ~= nil) 73 | local cuda_type = gpu_mode and 'torch.CudaTensor' or 'torch.DoubleTensor' 74 | check_type(inputs, cuda_type) 75 | end 76 | 77 | -- Returns size of outputs of this hidden module 78 | function HiddenLayer:getOutputSize() 79 | error("Get output size not implemented!") 80 | end 81 | 82 | -- Returns parameters of this model: parameters and gradients 83 | function HiddenLayer:getParameters() 84 | error("Get parameters not implemented!") 85 | end 86 | 87 | -- zeros out the gradients 88 | function HiddenLayer:zeroGradParameters() 89 | error("Zero grad parameters not implemented!") 90 | end 91 | 92 | function HiddenLayer:normalizeGrads(batch_size) 93 | error("Normalize gradients not implemented!") 94 | end 95 | 96 | -------------------------------------------------------------------------------- /dmn/input_module/input_layers/BOWLayer.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | BOW Layer: Represents a sentence as a sum of bag of words of its embeddings 4 | 5 | --]] 6 | 7 | local BOWLayer, parent = torch.class('dmn.BOWLayer', 'dmn.InputLayer') 8 | 9 | function BOWLayer:__init(config) 10 | parent.__init(self, config) 11 | 12 | -- create master embed cell 13 | self.emb = self:new_emb_table() 14 | 15 | -- transfer them to gpu mode 16 | if self.gpu_mode then 17 | self:set_gpu_mode() 18 | end 19 | 20 | -- get their parameters 21 | end 22 | 23 | function BOWLayer:forget() 24 | end 25 | 26 | function BOWLayer:new_emb_table() 27 | local emb_table = nn.LookupTable(self.vocab_size, self.emb_dim) 28 | 29 | -- Copy embedding weights 30 | if self.emb_vecs ~= nil then 31 | print("Initializing embeddings from config ") 32 | emb_table.weight:copy(config.emb_vecs) 33 | end 34 | 35 | local emb = nn.Sequential() 36 | :add(emb_table) 37 | :add(nn.Sum(1)) 38 | 39 | if self.dropout then 40 | print("Adding dropout to embed layer") 41 | emb:add(nn.Dropout(self.dropout_prob, false)) 42 | end 43 | 44 | -- share parameters if needed 45 | if self.gpu_mode then 46 | print("CUDA BABY") 47 | emb:cuda() 48 | end 49 | 50 | if self.emb ~= nil then 51 | emb:share(self.emb, 'weight', 'bias', 'gradWeight', 'gradBias') 52 | end 53 | return emb 54 | end 55 | 56 | 57 | 58 | -- Returns all of the weights of this module 59 | function BOWLayer:getWeights() 60 | return self.params 61 | end 62 | 63 | -- Sets gpu mode 64 | function BOWLayer:set_gpu_mode() 65 | self.emb:cuda() 66 | self.gpu_mode = true 67 | end 68 | 69 | function BOWLayer:set_cpu_mode() 70 | self.emb:double() 71 | self.gpu_mode = false 72 | end 73 | 74 | -- Enable Dropouts 75 | function BOWLayer:enable_dropouts() 76 | enable_sequential_dropouts(self.emb) 77 | end 78 | 79 | -- Disable Dropouts 80 | function BOWLayer:disable_dropouts() 81 | disable_sequential_dropouts(self.emb) 82 | end 83 | 84 | 85 | -- Does a single forward step of hashing layer, projecting hashed word vectors 86 | -- Into lower dimensional latent semantic space 87 | function BOWLayer:forward(sentence_rep) 88 | assert(sentence_rep ~= nil, "Must specify word tokens") 89 | 90 | self.word_proj = self.emb:forward(sentence_rep) 91 | 92 | return self.word_proj 93 | end 94 | 95 | function BOWLayer:backward(sentence_rep, err) 96 | assert(sentence_rep ~= nil, "Must specify word tokens") 97 | assert(err ~= nil, "Must specify error with respect to gradient output") 98 | 99 | self.errors = self.emb:backward(sentence_rep, err) 100 | 101 | return self.errors 102 | end 103 | 104 | function BOWLayer:share_params(other, ...) 105 | assert(other ~= nil, "Must specify other BOWLayer to share params with") 106 | print("Sharing BOW HashLayer") 107 | self.emb:share(other.emb, ...) 108 | 109 | local cur_type = self.gpu_mode and torch.CudaTensor or torch.IntTensor 110 | 111 | local input = cur_type{1, 2} 112 | local test = self:forward(input) 113 | local test1 = other:forward(input) 114 | local diff = test - test1 115 | assert(torch.sum(diff) == 0, "Parameters aren't shared") 116 | end 117 | 118 | -- Returns size of outputs of this combine module 119 | function BOWLayer:getOutputSize() 120 | return self.emb_dim 121 | end 122 | 123 | function BOWLayer:getParameters() 124 | return self.params, self.grad_params 125 | end 126 | 127 | -- zeros out the gradients 128 | function BOWLayer:zeroGradParameters() 129 | self.emb:zeroGradParameters() 130 | end 131 | 132 | function BOWLayer:getModules() 133 | return {self.emb} 134 | end 135 | 136 | -- Shares parameters between this embed layer and other layer 137 | function BOWLayer:share(other, ...) 138 | self:share_params(other, ...) 139 | end 140 | 141 | 142 | function BOWLayer:normalizeGrads(batch_size) 143 | self.emb.gradWeight:div(batch_size) 144 | end 145 | 146 | function BOWLayer:print_config() 147 | printf('%-25s = %s\n', 'gpu mode', self.config.gpu_mode) 148 | printf('%-25s = %s\n', 'use dropout', self.config.dropout) 149 | printf('%-25s = %d\n', 'embed dimension', self.config.emb_dim) 150 | printf('%-25s = %d\n', 'number of classes', self.config.num_classes) 151 | printf('%-25s = %f\n', 'input dropout probability', self.config.dropout_prob) 152 | end 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /dmn/input_module/input_layers/EmbedLayer.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Embed layer: Simple word embedding layer for input into lstm 4 | 5 | --]] 6 | 7 | local EmbedLayer, parent = torch.class('dmn.EmbedLayer', 'dmn.InputLayer') 8 | 9 | function EmbedLayer:__init(config) 10 | parent.__init(self, config) 11 | self.config = dmn.functions.deepcopy(config) 12 | self.gpu_mode = self.config.gpu_mode 13 | self.emb_table = nn.LookupTable(self.vocab_size, self.emb_dim) 14 | -- Copy embedding weights 15 | if config.emb_vecs ~= nil then 16 | print("Initializing embeddings from config ") 17 | self.emb_table.weight:copy(config.emb_vecs) 18 | end 19 | 20 | self.emb = nn.Sequential() 21 | :add(self.emb_table) 22 | 23 | 24 | if self.dropout then 25 | print("Adding dropout to embed layer") 26 | self.emb:add(nn.Dropout(self.dropout_prob, false)) 27 | end 28 | 29 | if self.gpu_mode then 30 | self:set_gpu_mode() 31 | end 32 | end 33 | 34 | -- Returns all of the weights of this module 35 | function EmbedLayer:getWeights() 36 | return self.params 37 | end 38 | 39 | -- Sets gpu mode 40 | function EmbedLayer:set_gpu_mode() 41 | dmn.logger:print("Setting GPU mode on embed layer") 42 | self.emb:cuda() 43 | self.emb_table:cuda() 44 | self.gpu_mode = true 45 | print("GPU MODE FOR EMBED LAYER IS " .. tostring(self.gpu_mode)) 46 | end 47 | 48 | function EmbedLayer:set_cpu_mode() 49 | dmn.logger:print("Setting CPU mode on embed layer") 50 | self.emb:double() 51 | self.emb_table:double() 52 | self.gpu_mode = false 53 | end 54 | 55 | -- Enable Dropouts 56 | function EmbedLayer:enable_dropouts() 57 | enable_sequential_dropouts(self.emb) 58 | end 59 | 60 | -- Disable Dropouts 61 | function EmbedLayer:disable_dropouts() 62 | disable_sequential_dropouts(self.emb) 63 | end 64 | 65 | -- Does a single forward step of concat layer, concatenating 66 | -- Input 67 | function EmbedLayer:forward(word_indeces) 68 | assert(word_indeces ~= nil, "Must specify word indeces to forward") 69 | parent:forward(word_indeces, self.gpu_mode) 70 | self.word_proj = self.emb:forward(word_indeces) 71 | return self.word_proj 72 | end 73 | 74 | function EmbedLayer:backward(word_indices, err) 75 | 76 | parent:backward(word_indices, err, self.gpu_mode) 77 | local emb_err = self.emb:backward(word_indices, err) 78 | return emb_err 79 | end 80 | 81 | function EmbedLayer:share_params(other, ...) 82 | assert(other ~= nil, "Must specify other layer to share params with") 83 | 84 | self.emb_table:share(other.emb_table, ...) 85 | 86 | -- sanity check: make sure you get same outputs on forwarding 87 | local input = self.gpu_mode and torch.CudaTensor{1, 2} or torch.IntTensor{1, 2} 88 | local test = self:forward(input) 89 | local test1 = other:forward(input) 90 | local diff = test - test1 91 | assert(torch.sum(diff) == 0, "Parameters aren't shared") 92 | end 93 | -- Returns size of outputs of this combine module 94 | function EmbedLayer:getOutputSize() 95 | return self.emb_dim 96 | end 97 | 98 | function EmbedLayer:getParameters() 99 | return self.params, self.grad_params 100 | end 101 | 102 | -- zeros out the gradients 103 | function EmbedLayer:zeroGradParameters() 104 | self.emb:zeroGradParameters() 105 | end 106 | 107 | function EmbedLayer:getModules() 108 | return {self.emb} 109 | end 110 | 111 | function EmbedLayer:normalizeGrads(batch_size) 112 | self.emb.gradWeight:div(batch_size) 113 | end 114 | 115 | function EmbedLayer:print_config() 116 | printf('%-25s = %s\n', 'gpu mode', self.config.gpu_mode) 117 | printf('%-25s = %s\n', 'use dropout', self.config.dropout) 118 | printf('%-25s = %d\n', 'embed dimension', self.config.emb_dim) 119 | printf('%-25s = %d\n', 'number of classes', self.config.num_classes) 120 | printf('%-25s = %f\n', 'input dropout probability', self.config.dropout_prob) 121 | end 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /dmn/input_module/input_layers/FastHashLayer.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Fast Hash layer: Trigram word hashing layer to create word embeddings but in sparse representation 4 | to save memory: represent words as indices in the word hashing layer and use a lookup table 5 | plus sum layer 6 | 7 | --]] 8 | 9 | local FastHashLayer, parent = torch.class('dmn.FastHashLayer', 'dmn.InputLayer') 10 | 11 | function FastHashLayer:__init(config) 12 | parent.__init(self, config) 13 | 14 | -- create master embed cell 15 | self.emb = self:new_emb_table() 16 | self.join_layer = nn.JoinTable(1) 17 | 18 | -- create array of children cells 19 | self.emb_arr = {} 20 | 21 | -- transfer them to gpu mode 22 | if self.gpu_mode then 23 | self:set_gpu_mode() 24 | end 25 | 26 | -- get their parameters 27 | end 28 | 29 | function FastHashLayer:new_emb_table() 30 | local emb_table = nn.LookupTable(self.vocab_size, self.emb_dim) 31 | 32 | -- Copy embedding weights 33 | if self.emb_vecs ~= nil then 34 | dmn.logger:print("Initializing embeddings from config ") 35 | emb_table.weight:copy(config.emb_vecs) 36 | end 37 | 38 | local emb = nn.Sequential() 39 | :add(emb_table) 40 | 41 | if self.dropout then 42 | dmn.logger:print("Adding dropout to embed layer") 43 | emb:add(nn.Dropout(self.dropout_prob, false)) 44 | end 45 | 46 | -- share parameters if needed 47 | if self.gpu_mode then 48 | dmn.logger:print("CUDA BABY") 49 | emb:cuda() 50 | end 51 | 52 | if self.emb ~= nil then 53 | emb:share(self.emb, 'weight', 'bias', 'gradWeight', 'gradBias') 54 | end 55 | return emb 56 | end 57 | 58 | 59 | 60 | -- Returns all of the weights of this module 61 | function FastHashLayer:getWeights() 62 | return self.params 63 | end 64 | 65 | -- Sets gpu mode 66 | function FastHashLayer:set_gpu_mode() 67 | self.emb:cuda() 68 | self.gpu_mode = true 69 | end 70 | 71 | function FastHashLayer:set_cpu_mode() 72 | self.emb:double() 73 | self.gpu_mode = false 74 | end 75 | 76 | -- Enable Dropouts 77 | function FastHashLayer:enable_dropouts() 78 | enable_sequential_dropouts(self.emb) 79 | end 80 | 81 | -- Disable Dropouts 82 | function FastHashLayer:disable_dropouts() 83 | disable_sequential_dropouts(self.emb) 84 | end 85 | 86 | 87 | -- Does a single forward step of hashing layer, projecting hashed word vectors 88 | -- Into lower dimensional latent semantic space 89 | function FastHashLayer:forward(hashed_rep) 90 | assert(hashed_rep ~= nil, "Must specify word tokens") 91 | 92 | local char_indeces, word_lengths = unpack(hashed_rep) 93 | 94 | -- get number of tokens in the sentence 95 | local sentence_length = word_lengths:size(1) 96 | self.word_proj = self.gpu_mode and torch.CudaTensor(sentence_length, self.emb_dim) 97 | or torch.DoubleTensor(sentence_length, self.emb_dim) 98 | 99 | self.joined_vectors = char_indeces 100 | -- first get character/hashed embeddings 101 | self.char_embeddings = self.emb:forward(self.joined_vectors) 102 | 103 | local cur_index = 1 104 | 105 | for i = 1, sentence_length do 106 | local cur_word_length = word_lengths[i] 107 | local cur_embeddings = self.char_embeddings[{{cur_index, cur_index + cur_word_length - 1}}] 108 | 109 | -- sum up embeddings == bag of words representation 110 | self.word_proj[i] = torch.squeeze(cur_embeddings:sum(1)) 111 | cur_index = cur_index + cur_word_length 112 | end 113 | 114 | return self.word_proj 115 | end 116 | 117 | function FastHashLayer:backward(hashed_rep, err) 118 | assert(hashed_rep ~= nil, "Must specify word tokens") 119 | assert(err ~= nil, "Must specify error with respect to gradient output") 120 | 121 | 122 | local char_indeces, word_lengths = unpack(hashed_rep) 123 | 124 | -- get number of tokens in the sentence 125 | local sentence_length = word_lengths:size(1) 126 | 127 | -- first join the tables into one table 128 | self.errors = self.char_embeddings:new() 129 | local cur_index = 1 130 | for i = 1, sentence_length do 131 | -- since we sum up our gradient of input is just gradient of output 132 | local cur_word_length = word_lengths[i] 133 | 134 | for j = cur_index, cur_index + cur_word_length - 1 do 135 | self.errors[j] = err[i] 136 | end 137 | 138 | -- update word index 139 | cur_index = cur_index + cur_word_length 140 | end 141 | 142 | local errs = self.emb:backward(self.joined_vectors, self.errors) 143 | return errs 144 | end 145 | 146 | function FastHashLayer:share_params(other, ...) 147 | assert(other ~= nil, "Must specify other FastHashLayer to share params with") 148 | dmn.logger:print("Sharing Sparse HashLayer") 149 | self.emb:share(other.emb, ...) 150 | 151 | local cur_type = self.gpu_mode and torch.CudaTensor or torch.IntTensor 152 | 153 | local input = {cur_type{1, 2}, cur_type{1}} 154 | local test = self:forward(input) 155 | local test1 = other:forward(input) 156 | local diff = test - test1 157 | assert(torch.sum(diff) == 0, "Parameters aren't shared") 158 | end 159 | 160 | -- Returns size of outputs of this combine module 161 | function FastHashLayer:getOutputSize() 162 | return self.emb_dim 163 | end 164 | 165 | function FastHashLayer:getParameters() 166 | return self.params, self.grad_params 167 | end 168 | 169 | -- zeros out the gradients 170 | function FastHashLayer:zeroGradParameters() 171 | self.emb:zeroGradParameters() 172 | end 173 | 174 | function FastHashLayer:getModules() 175 | return {self.emb} 176 | end 177 | 178 | -- Shares parameters between this embed layer and other layer 179 | function FastHashLayer:share(other) 180 | end 181 | 182 | 183 | function FastHashLayer:normalizeGrads(batch_size) 184 | self.emb.gradWeight:div(batch_size) 185 | end 186 | 187 | 188 | 189 | 190 | 191 | -------------------------------------------------------------------------------- /dmn/input_module/input_layers/HashLayer.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Hash layer: Trigram word hashing layer to create word embeddings 4 | 5 | --]] 6 | 7 | local HashLayer, parent = torch.class('dmn.HashLayer', 'dmn.InputLayer') 8 | 9 | function HashLayer:__init(config) 10 | parent.__init(self, config) 11 | self.emb_table = nn.Linear(self.vocab_size, self.emb_dim) 12 | -- Copy embedding weights 13 | if config.emb_vecs ~= nil then 14 | print("Initializing embeddings from config ") 15 | self.emb_table.weight:copy(config.emb_vecs) 16 | end 17 | 18 | self.emb = nn.Sequential() 19 | :add(self.emb_table) 20 | 21 | if self.dropout then 22 | print("Adding dropout to embed layer") 23 | self.emb:add(nn.Dropout(self.dropout_prob, false)) 24 | end 25 | 26 | 27 | if self.gpu_mode then 28 | self:set_gpu_mode() 29 | end 30 | end 31 | 32 | -- Returns all of the weights of this module 33 | function HashLayer:getWeights() 34 | return self.params 35 | end 36 | 37 | -- Sets gpu mode 38 | function HashLayer:set_gpu_mode() 39 | self.emb:cuda() 40 | self.gpu_mode = true 41 | end 42 | 43 | function HashLayer:set_cpu_mode() 44 | self.emb:double() 45 | self.gpu_mode = false 46 | end 47 | 48 | -- Enable Dropouts 49 | function HashLayer:enable_dropouts() 50 | enable_sequential_dropouts(self.emb) 51 | end 52 | 53 | -- Disable Dropouts 54 | function HashLayer:disable_dropouts() 55 | disable_sequential_dropouts(self.emb) 56 | end 57 | 58 | 59 | -- Does a single forward step of hashing layer, projecting hashed word vectors 60 | -- Into lower dimensional latent semantic space 61 | function HashLayer:forward(hashed_rep) 62 | assert(hashed_rep ~= nil, "Must specify word tokens") 63 | self.word_proj = self.emb:forward(hashed_rep) 64 | return self.word_proj 65 | end 66 | 67 | function HashLayer:backward(hashed_rep, err) 68 | assert(word_tokens ~= nil, "Must specify word tokens") 69 | assert(err ~= nil, "Must specify error with respect to gradient output") 70 | local emb_err = self.emb:backward(hashed_rep, err) 71 | return emb_err 72 | end 73 | 74 | function HashLayer:share(other, ...) 75 | share_params(self.emb, other.emb, ...) 76 | end 77 | 78 | -- Returns size of outputs of this combine module 79 | function HashLayer:getOutputSize() 80 | return self.emb_dim 81 | end 82 | 83 | function HashLayer:getParameters() 84 | return self.params, self.grad_params 85 | end 86 | 87 | -- zeros out the gradients 88 | function HashLayer:zeroGradParameters() 89 | self.emb:zeroGradParameters() 90 | end 91 | 92 | function HashLayer:getModules() 93 | return {self.emb} 94 | end 95 | 96 | -- Shares parameters between this embed layer and other layer 97 | function HashLayer:share(other) 98 | end 99 | 100 | 101 | function HashLayer:normalizeGrads(batch_size) 102 | self.emb.gradWeight:div(batch_size) 103 | end 104 | 105 | 106 | -------------------------------------------------------------------------------- /dmn/input_module/input_layers/InputLayer.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Hidden Layer base class 4 | 5 | --]] 6 | 7 | local InputLayer = torch.class('dmn.InputLayer') 8 | 9 | function InputLayer:__init(config) 10 | assert(config.emb_dim ~= nil, "Must specify embed dimensions") 11 | assert(config.num_classes ~= nil, "Must specify number of classes") 12 | assert(config.dropout_prob ~= nil, "Must specify dropout probability") 13 | assert(config.gpu_mode ~= nil, "Must specify gpu mode") 14 | assert(config.dropout ~= nil, "Must specify whether to use dropout or not") 15 | 16 | self.config = dmn.functions.deepcopy(config) 17 | self.gpu_mode = config.gpu_mode 18 | self.emb_dim = config.emb_dim 19 | self.emb_vecs = config.emb_vecs 20 | self.dropout = config.dropout 21 | self.vocab_size = config.num_classes 22 | self.dropout_prob = config.dropout_prob 23 | 24 | if config.emb_vecs ~= nil then 25 | self.vocab_size = config.emb_vecs:size(1) 26 | end 27 | end 28 | 29 | -- Returns all of the weights of this module 30 | function InputLayer:getWeights() 31 | error("Get weights not implemented!") 32 | end 33 | 34 | -- Sets gpu mode 35 | function InputLayer:set_gpu_mode() 36 | error("Set gpu mode not implemented!") 37 | end 38 | 39 | function InputLayer:set_cpu_mode() 40 | error("Set cpu mode not implemented!") 41 | end 42 | 43 | -- Enable Dropouts 44 | function InputLayer:enable_dropouts() 45 | error("Enable dropouts not implemented!") 46 | end 47 | 48 | -- Disable Dropouts 49 | function InputLayer:disable_dropouts() 50 | error("Disable dropouts not implemented!") 51 | end 52 | 53 | 54 | -- Does a single forward step of concat layer, concatenating 55 | -- Input 56 | function InputLayer:forward(word_indices, gpu_mode) 57 | assert(word_indices ~= nil) 58 | --print("Gpu mode for forward step parent", gpu_mode) 59 | local word_type = gpu_mode and 'torch.CudaTensor' or 'torch.IntTensor' 60 | check_type(word_indices, word_type) 61 | end 62 | 63 | function InputLayer:backward(word_indices, err, gpu_mode) 64 | assert(word_indices ~= nil, "Word indices are null!") 65 | assert(err ~= nil, "Error is null!") 66 | 67 | local word_type = gpu_mode and 'torch.CudaTensor' or 'torch.IntTensor' 68 | check_type(word_indices, word_type) 69 | end 70 | 71 | -- Returns size of outputs of this combine module 72 | function InputLayer:getOutputSize() 73 | error("Get output size not implemented!") 74 | end 75 | 76 | function InputLayer:getParameters() 77 | error("Get parameters not implemented!") 78 | end 79 | 80 | -- zeros out the gradients 81 | function InputLayer:zeroGradParameters() 82 | error("Zero grad parameters not implemented!") 83 | end 84 | 85 | function InputLayer:getModules() 86 | error("Get modules not implemented!") 87 | end 88 | 89 | function InputLayer:share(other, ...) 90 | error("Get modules not implemented!") 91 | end 92 | 93 | function InputLayer:normalizeGrads(batch_size) 94 | error("Normalize grads not implemented!") 95 | end 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /dmn/input_module/input_layers/SparseHashLayer.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Hash layer: Trigram word hashing layer to create word embeddings but in sparse representation 4 | to save memory: represent words as indices in the word hashing layer and use a lookup table 5 | plus sum layer 6 | 7 | --]] 8 | 9 | local SparseHashLayer, parent = torch.class('dmn.SparseHashLayer', 'dmn.InputLayer') 10 | 11 | function SparseHashLayer:__init(config) 12 | parent.__init(self, config) 13 | 14 | -- create master embed cell 15 | self.emb = self:new_emb_table() 16 | 17 | -- create array of children cells 18 | self.emb_arr = {} 19 | 20 | -- transfer them to gpu mode 21 | if self.gpu_mode then 22 | self:set_gpu_mode() 23 | end 24 | 25 | -- get their parameters 26 | end 27 | 28 | function SparseHashLayer:new_emb_table() 29 | local emb_table = nn.LookupTable(self.vocab_size, self.emb_dim) 30 | 31 | -- Copy embedding weights 32 | if self.emb_vecs ~= nil then 33 | print("Initializing embeddings from config ") 34 | emb_table.weight:copy(config.emb_vecs) 35 | end 36 | 37 | local emb = nn.Sequential() 38 | :add(emb_table) 39 | :add(nn.Sum(1)) 40 | 41 | if self.dropout then 42 | print("Adding dropout to embed layer") 43 | emb:add(nn.Dropout(self.dropout_prob, false)) 44 | end 45 | 46 | -- share parameters if needed 47 | if self.gpu_mode then 48 | print("CUDA BABY") 49 | emb:cuda() 50 | end 51 | 52 | if self.emb ~= nil then 53 | emb:share(self.emb, 'weight', 'bias', 'gradWeight', 'gradBias') 54 | end 55 | return emb 56 | end 57 | 58 | 59 | 60 | -- Returns all of the weights of this module 61 | function SparseHashLayer:getWeights() 62 | return self.params 63 | end 64 | 65 | -- Sets gpu mode 66 | function SparseHashLayer:set_gpu_mode() 67 | self.emb:cuda() 68 | for i = 1, #self.emb_arr do 69 | self.emb_arr[i]:cuda() 70 | end 71 | self.gpu_mode = true 72 | end 73 | 74 | function SparseHashLayer:set_cpu_mode() 75 | self.emb:double() 76 | for i = 1, #self.emb_arr do 77 | self.emb_arr[i]:double() 78 | end 79 | self.gpu_mode = false 80 | end 81 | 82 | -- Enable Dropouts 83 | function SparseHashLayer:enable_dropouts() 84 | enable_sequential_dropouts(self.emb) 85 | for i = 1, #self.emb_arr do 86 | enable_sequential_dropouts(self.emb_arr[i]) 87 | end 88 | end 89 | 90 | -- Disable Dropouts 91 | function SparseHashLayer:disable_dropouts() 92 | disable_sequential_dropouts(self.emb) 93 | for i = 1, #self.emb_arr do 94 | disable_sequential_dropouts(self.emb_arr[i]) 95 | end 96 | end 97 | 98 | 99 | -- Does a single forward step of hashing layer, projecting hashed word vectors 100 | -- Into lower dimensional latent semantic space 101 | function SparseHashLayer:forward(hashed_rep) 102 | assert(hashed_rep ~= nil, "Must specify word tokens") 103 | 104 | -- get number of tokens in the sentence 105 | local sentence_length = #hashed_rep 106 | self.word_proj = self.gpu_mode and torch.CudaTensor(sentence_length, self.emb_dim) 107 | or torch.DoubleTensor(sentence_length, self.emb_dim) 108 | 109 | for i = 1, sentence_length do 110 | local cur_rep = hashed_rep[i] 111 | if self.emb_arr[i] == nil then 112 | print("Creating a new sparsehash word encoder for vector"..i) 113 | self.emb_arr[i] = self:new_emb_table() 114 | end 115 | self.word_proj[i] = self.emb_arr[i]:forward(cur_rep) 116 | end 117 | 118 | return self.word_proj 119 | end 120 | 121 | function SparseHashLayer:backward(hashed_rep, err) 122 | assert(hashed_rep ~= nil, "Must specify word tokens") 123 | assert(err ~= nil, "Must specify error with respect to gradient output") 124 | 125 | -- get number of tokens in the sentence 126 | local sentence_length = #hashed_rep 127 | 128 | self.errors = {} 129 | 130 | for i = sentence_length, 1, -1 do 131 | local cur_rep = hashed_rep[i] 132 | if self.emb_arr[i] == nil then 133 | print("Creating a word sentence encoder") 134 | self.emb_arr[i] = self:new_emb_table() 135 | end 136 | 137 | self.errors[i] = self.emb_arr[i]:backward(cur_rep, err[i]) 138 | end 139 | 140 | return self.errors 141 | end 142 | 143 | function SparseHashLayer:share_params(other, ...) 144 | assert(other ~= nil, "Must specify other SparseHashLayer to share params with") 145 | print("Sharing Sparse HashLayer") 146 | self.emb:share(other.emb, ...) 147 | 148 | local cur_type = self.gpu_mode and torch.CudaTensor or torch.IntTensor 149 | 150 | local input = {cur_type{1, 2}, cur_type{3, 4}} 151 | local test = self:forward(input) 152 | local test1 = other:forward(input) 153 | local diff = test - test1 154 | assert(torch.sum(diff) == 0, "Parameters aren't shared") 155 | end 156 | 157 | -- Returns size of outputs of this combine module 158 | function SparseHashLayer:getOutputSize() 159 | return self.emb_dim 160 | end 161 | 162 | function SparseHashLayer:getParameters() 163 | return self.params, self.grad_params 164 | end 165 | 166 | -- zeros out the gradients 167 | function SparseHashLayer:zeroGradParameters() 168 | self.emb:zeroGradParameters() 169 | end 170 | 171 | function SparseHashLayer:getModules() 172 | return {self.emb} 173 | end 174 | 175 | -- Shares parameters between this embed layer and other layer 176 | function SparseHashLayer:share(other) 177 | end 178 | 179 | 180 | function SparseHashLayer:normalizeGrads(batch_size) 181 | self.emb.gradWeight:div(batch_size) 182 | end 183 | 184 | 185 | 186 | 187 | 188 | -------------------------------------------------------------------------------- /dmn/models/RNN_Utils.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Class to factor out common code from all of the lstms/rnns/grus etc. This was becoming repetitive 3 | ]] 4 | local rnn_utils = torch.class('dmn.rnn_utils') 5 | 6 | -- Initializes values from lstm 7 | function rnn_utils.init_values(lstm) 8 | assert(lstm ~= nil, "Must specify lstm to init values for") 9 | lstm.tensor_type = lstm.gpu_mode and torch.CudaTensor or torch.DoubleTensor 10 | lstm.depth = 0 11 | lstm.cells = {} -- table of cells in a roll-out 12 | lstm.tensors = {} -- table of tensors for faster lookup 13 | lstm.back_tensors = {} -- table of tensors for backprop 14 | 15 | -- initial (t = 0) states for forward propagation and initial error signals 16 | -- for backpropagation 17 | local ctable_init, ctable_grad, htable_init, htable_grad 18 | if lstm.num_layers == 1 then 19 | ctable_init = lstm.tensor_type(lstm.mem_dim):zero() 20 | htable_init = lstm.tensor_type(lstm.mem_dim):zero() 21 | ctable_grad = lstm.tensor_type(lstm.mem_dim):zero() 22 | htable_grad = lstm.tensor_type(lstm.mem_dim):zero() 23 | else 24 | ctable_init, ctable_grad, htable_init, htable_grad = {}, {}, {}, {} 25 | for i = 1, lstm.num_layers do 26 | ctable_init[i] = lstm.tensor_type(lstm.mem_dim):zero() 27 | htable_init[i] = lstm.tensor_type(lstm.mem_dim):zero() 28 | ctable_grad[i] = lstm.tensor_type(lstm.mem_dim):zero() 29 | htable_grad[i] = lstm.tensor_type(lstm.mem_dim):zero() 30 | end 31 | end 32 | 33 | lstm.dummy_values = {dmn.functions.deepcopy(ctable_init), dmn.functions.deepcopy(htable_init)} 34 | lstm.initial_values = {ctable_init, htable_init} 35 | lstm.gradInput = { 36 | lstm.tensor_type(lstm.in_dim):zero(), -- grad with respect to input 37 | ctable_grad, 38 | htable_grad, -- grad with respect to hidden state of lstm 39 | } 40 | 41 | 42 | -- precreate outputs for faster performance 43 | for i = 1, 100 do 44 | lstm.tensors[i] = lstm.tensor_type(i, lstm.mem_dim):zero() 45 | lstm.back_tensors[i] = lstm.tensor_type(i, lstm.in_dim):zero() 46 | end 47 | end 48 | 49 | -- Initializes values from lstm 50 | function rnn_utils.init_values_attention(lstm) 51 | assert(lstm ~= nil, "Must specify lstm to init values for") 52 | lstm.tensor_type = lstm.config.gpu_mode and torch.CudaTensor or torch.DoubleTensor 53 | lstm.depth = 0 54 | lstm.cells = {} -- table of cells in a roll-out 55 | lstm.tensors = {} -- table of tensors for faster lookup 56 | lstm.back_tensors = {} -- table of tensors for backprop 57 | 58 | -- initial (t = 0) states for forward propagation and initial error signals 59 | -- for backpropagation 60 | local ctable_init, ctable_grad, htable_init, htable_grad 61 | if lstm.num_layers == 1 then 62 | ctable_init = lstm.tensor_type(lstm.mem_dim):zero() 63 | htable_init = lstm.tensor_type(lstm.mem_dim):zero() 64 | ctable_grad = lstm.tensor_type(lstm.mem_dim):zero() 65 | htable_grad = lstm.tensor_type(lstm.mem_dim):zero() 66 | else 67 | ctable_init, ctable_grad, htable_init, htable_grad = {}, {}, {}, {} 68 | for i = 1, lstm.num_layers do 69 | ctable_init[i] = lstm.tensor_type(lstm.mem_dim):zero() 70 | htable_init[i] = lstm.tensor_type(lstm.mem_dim):zero() 71 | ctable_grad[i] = lstm.tensor_type(lstm.mem_dim):zero() 72 | htable_grad[i] = lstm.tensor_type(lstm.mem_dim):zero() 73 | end 74 | end 75 | 76 | lstm.dummy_values = {dmn.functions.deepcopy(ctable_init), dmn.functions.deepcopy(htable_init)} 77 | lstm.initial_values = {ctable_init, htable_init} 78 | lstm.gradInput = { 79 | lstm.tensor_type(lstm.in_dim):zero(), -- grad with respect to input 80 | lstm.tensor_type(lstm.in_dim):zero(), -- dummy 81 | ctable_grad, -- grad with respect to cell state of lstm 82 | htable_grad, -- grad with respect to hidden state of lstm 83 | } 84 | 85 | 86 | -- precreate outputs for faster performance 87 | for i = 1, 100 do 88 | lstm.tensors[i] = lstm.tensor_type(i, lstm.mem_dim):zero() 89 | lstm.back_tensors[i] = lstm.tensor_type(i, lstm.in_dim):zero() 90 | end 91 | end 92 | 93 | function rnn_utils.new_initial_values(lstm) 94 | assert(lstm ~= nil, "Must specify lstm to init values for") 95 | local ctable_init, htable_init 96 | if lstm.num_layers == 1 then 97 | ctable_init = lstm.tensor_type(lstm.mem_dim):zero() 98 | htable_init = lstm.tensor_type(lstm.mem_dim):zero() 99 | else 100 | htable_init, ctable_init = {}, {} 101 | for i = 1, lstm.num_layers do 102 | ctable_init[i] = lstm.tensor_type(lstm.mem_dim):zero() 103 | htable_init[i] = lstm.tensor_type(lstm.mem_dim):zero() 104 | end 105 | end 106 | 107 | return {ctable_init, htable_init} 108 | end 109 | 110 | function rnn_utils.new_initial_values_gru(lstm) 111 | assert(lstm ~= nil, "Must specify lstm to init values for") 112 | local ctable_init, htable_init 113 | if lstm.num_layers == 1 then 114 | htable_init = lstm.tensor_type(lstm.mem_dim):zero() 115 | else 116 | htable_init, ctable_init = {} 117 | for i = 1, lstm.num_layers do 118 | htable_init[i] = lstm.tensor_type(lstm.mem_dim):zero() 119 | end 120 | end 121 | 122 | return htable_init 123 | end -------------------------------------------------------------------------------- /dmn/models/nn_units/CRowAddTable.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Add a vector to every row of a matrix. 3 | Input: { [n x m], [m] } 4 | Output: [n x m] 5 | --]] 6 | 7 | local CRowAddTable, parent = torch.class('dmn.CRowAddTable', 'nn.Module') 8 | 9 | function CRowAddTable:__init() 10 | parent.__init(self) 11 | self.gradInput = {} 12 | end 13 | 14 | function CRowAddTable:updateOutput(input) 15 | self.output:resizeAs(input[1]):copy(input[1]) 16 | for i = 1, self.output:size(1) do 17 | self.output[i]:add(input[2]) 18 | end 19 | return self.output 20 | end 21 | 22 | function CRowAddTable:updateGradInput(input, gradOutput) 23 | self.gradInput[1] = self.gradInput[1] or input[1].new() 24 | self.gradInput[2] = self.gradInput[2] or input[2].new() 25 | self.gradInput[1]:resizeAs(input[1]) 26 | self.gradInput[2]:resizeAs(input[2]):zero() 27 | 28 | self.gradInput[1]:copy(gradOutput) 29 | for i = 1, gradOutput:size(1) do 30 | self.gradInput[2]:add(gradOutput[i]) 31 | end 32 | 33 | return self.gradInput 34 | end -------------------------------------------------------------------------------- /dmn/models/nn_units/CRowMulTable.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Add a vector to every row of a matrix. 3 | Input: { [n x m], [m] } 4 | Output: [n x m] 5 | --]] 6 | 7 | local CRowMulTable, parent = torch.class('dmn.CRowMulTable', 'nn.Module') 8 | 9 | function CRowMulTable:__init() 10 | parent.__init(self) 11 | self.gradInput = {} 12 | end 13 | 14 | function CRowMulTable:updateOutput(input) 15 | self.output = input[1] * input[2] 16 | return self.output 17 | end 18 | 19 | function CRowMulTable:updateGradInput(input, gradOutput) 20 | self.gradInput[1] = self.gradInput[1] or input[1].new() 21 | self.gradInput[2] = self.gradInput[2] or input[2].new() 22 | self.gradInput[1]:resizeAs(input[1]) 23 | self.gradInput[2]:resizeAs(input[2]):zero() 24 | 25 | local grad_inputs = torch.sum(torch.cmul(gradOutput, input[1])) 26 | self.gradInput[1]:copy(gradOutput * input[2]) 27 | self.gradInput[2]:copy(grad_inputs) 28 | 29 | return self.gradInput 30 | end -------------------------------------------------------------------------------- /dmn/models/nn_units/JoinTable.lua: -------------------------------------------------------------------------------- 1 | local JoinTable, parent = torch.class('dmn.JoinTable', 'nn.Module') 2 | 3 | function JoinTable:__init() 4 | parent.__init(self) 5 | self.size = torch.LongStorage() 6 | self.gradInput = {} 7 | self.nInputDims = nInputDims 8 | end 9 | 10 | function JoinTable:updateOutput(input) 11 | local num_input = #input 12 | local input_size = input[1]:size(1) 13 | self.size = torch.LongStorage{num_input, input_size} 14 | 15 | local output = self.output:resize(self.size) 16 | for i = 1, #input do 17 | output[i]:copy(input[i]) 18 | end 19 | return output 20 | end 21 | 22 | function JoinTable:updateGradInput(input, gradOutput) 23 | local dimension = self.dimension 24 | if self.nInputDims and input[1]:dim()==(self.nInputDims+1) then 25 | dimension = dimension + 1 26 | end 27 | 28 | for i=1,#input do 29 | if self.gradInput[i] == nil then 30 | self.gradInput[i] = input[i].new() 31 | end 32 | self.gradInput[i]:resizeAs(input[i]) 33 | end 34 | 35 | -- clear out invalid gradInputs 36 | for i=#input+1, #self.gradInput do 37 | self.gradInput[i] = nil 38 | end 39 | 40 | for i=1,#input do 41 | local currentGradInput = gradOutput[i] 42 | self.gradInput[i]:copy(currentGradInput) 43 | end 44 | return self.gradInput 45 | end 46 | 47 | function JoinTable:type(type, tensorCache) 48 | self.gradInput = {} 49 | return parent.type(self, type, tensorCache) 50 | end 51 | -------------------------------------------------------------------------------- /dmn/models/nn_units/Linear.lua: -------------------------------------------------------------------------------- 1 | local Linear, parent = torch.class('dmn.Linear', 'nn.Module') 2 | 3 | function Linear:__init(inputSize, outputSize) 4 | parent.__init(self) 5 | 6 | dmn.logger:print("Initing linear table") 7 | self.weight = torch.Tensor(outputSize, inputSize) 8 | self.bias = torch.Tensor(outputSize) 9 | self.gradWeight = torch.Tensor(outputSize, inputSize) 10 | self.gradBias = torch.Tensor(outputSize) 11 | 12 | self:reset() 13 | end 14 | 15 | function Linear:reset(stdv) 16 | if stdv then 17 | stdv = stdv * math.sqrt(3) 18 | else 19 | stdv = 1./math.sqrt(self.weight:size(2)) 20 | end 21 | if nn.oldSeed then 22 | for i=1,self.weight:size(1) do 23 | self.weight:select(1, i):apply(function() 24 | return torch.uniform(-stdv, stdv) 25 | end) 26 | self.bias[i] = torch.uniform(-stdv, stdv) 27 | end 28 | else 29 | self.weight:uniform(-stdv, stdv) 30 | self.bias:uniform(-stdv, stdv) 31 | end 32 | 33 | return self 34 | end 35 | 36 | function Linear:updateOutput(input) 37 | if input:dim() == 1 then 38 | self.output:resize(self.bias:size(1)) 39 | self.output:copy(self.bias) 40 | self.output:addmv(1, self.weight, input) 41 | elseif input:dim() == 2 then 42 | local nframe = input:size(1) 43 | local nunit = self.bias:size(1) 44 | self.output:resize(nframe, nunit) 45 | if not self.addBuffer or self.addBuffer:nElement() ~= nframe then 46 | self.addBuffer = input.new(nframe):fill(1) 47 | end 48 | if nunit == 1 then 49 | -- Special case to fix output size of 1 bug: 50 | self.output:copy(self.bias:view(1,nunit):expand(#self.output)) 51 | self.output:select(2,1):addmv(1, input, self.weight:select(1,1)) 52 | else 53 | self.output:addmm(0, self.output, 1, input, self.weight:t()) 54 | self.output:addmm(1, input, self.weight:t()) 55 | end 56 | else 57 | error('input must be vector or matrix') 58 | end 59 | 60 | return self.output 61 | end 62 | 63 | function Linear:updateGradInput(input, gradOutput) 64 | if self.gradInput then 65 | 66 | local nElement = self.gradInput:nElement() 67 | self.gradInput:resizeAs(input) 68 | if self.gradInput:nElement() ~= nElement then 69 | self.gradInput:zero() 70 | end 71 | if input:dim() == 1 then 72 | self.gradInput:addmv(0, 1, self.weight:t(), gradOutput) 73 | elseif input:dim() == 2 then 74 | self.gradInput:addmm(0, 1, gradOutput, self.weight) 75 | end 76 | 77 | return self.gradInput 78 | end 79 | end 80 | 81 | function Linear:accGradParameters(input, gradOutput, scale) 82 | scale = scale or 1 83 | 84 | if input:dim() == 1 then 85 | self.gradWeight:addr(scale, gradOutput, input) 86 | self.gradBias:add(scale, gradOutput) 87 | elseif input:dim() == 2 then 88 | local nunit = self.bias:size(1) 89 | 90 | if nunit == 1 then 91 | -- Special case to fix output size of 1 bug: 92 | self.gradWeight:select(1,1):addmv(scale, input:t(), gradOutput:select(2,1)) 93 | self.gradBias:addmv(scale, gradOutput:t(), self.addBuffer) 94 | else 95 | self.gradWeight:addmm(scale, gradOutput:t(), input) 96 | self.gradBias:addmv(scale, gradOutput:t(), self.addBuffer) 97 | end 98 | end 99 | 100 | end 101 | 102 | -- we do not need to accumulate parameters when sharing 103 | Linear.sharedAccUpdateGradParameters = Linear.accUpdateGradParameters 104 | 105 | 106 | function Linear:__tostring__() 107 | return torch.type(self) .. 108 | string.format('(%d -> %d)', self.weight:size(2), self.weight:size(1)) 109 | end 110 | -------------------------------------------------------------------------------- /dmn/models/nn_units/PaddedJoinTable.lua: -------------------------------------------------------------------------------- 1 | local JoinTable, parent = torch.class('dmn.PaddedJoinTable', 'nn.Module') 2 | 3 | function JoinTable:__init() 4 | parent.__init(self) 5 | self.size = torch.LongStorage() 6 | self.gradInput = {} 7 | self.nInputDims = nInputDims 8 | end 9 | 10 | function JoinTable:updateOutput(input) 11 | local num_input = #input 12 | local input_size = input[1]:size(1) 13 | self.size = torch.LongStorage{num_input, input_size} 14 | 15 | local output = self.output:resize(self.size) 16 | for i = 1, #input do 17 | output[i]:copy(input[i]) 18 | end 19 | return output 20 | end 21 | 22 | function JoinTable:updateGradInput(input, gradOutput) 23 | local dimension = self.dimension 24 | if self.nInputDims and input[1]:dim()==(self.nInputDims+1) then 25 | dimension = dimension + 1 26 | end 27 | 28 | for i=1,#input do 29 | if self.gradInput[i] == nil then 30 | self.gradInput[i] = input[i].new() 31 | end 32 | self.gradInput[i]:resizeAs(input[i]) 33 | end 34 | 35 | -- clear out invalid gradInputs 36 | for i=#input+1, #self.gradInput do 37 | self.gradInput[i] = nil 38 | end 39 | 40 | for i=1,#input do 41 | local currentGradInput = gradOutput[i] 42 | self.gradInput[i]:copy(currentGradInput) 43 | end 44 | return self.gradInput 45 | end 46 | 47 | function JoinTable:type(type, tensorCache) 48 | self.gradInput = {} 49 | return parent.type(self, type, tensorCache) 50 | end 51 | -------------------------------------------------------------------------------- /dmn/models/nn_units/Padding.lua: -------------------------------------------------------------------------------- 1 | local Padding, parent = torch.class('dmn.Padding', 'nn.Module') 2 | 3 | -- Dynamically pads input by specified value: Makes sure padding size will 4 | -- add up to specified amount. 5 | -- pad can be positive (right) negative (left) 6 | function Padding:__init(dim, pad, nInputDim, value) 7 | self.dim = dim 8 | self.pad = pad 9 | self.nInputDim = nInputDim 10 | self.value = value or 0 11 | self.outputSize = torch.LongStorage() 12 | parent.__init(self) 13 | end 14 | 15 | 16 | function Padding:updateOutput(input) 17 | self.outputSize:resize(input:dim()) 18 | self.outputSize:copy(input:size()) 19 | local dim = self.dim 20 | if self.nInputDim and input:dim() ~= self.nInputDim then 21 | dim = dim + 1 22 | end 23 | self.outputSize[dim] = self.outputSize[dim] + math.abs(self.pad) 24 | self.output:resize(self.outputSize) 25 | self.output:fill(self.value) 26 | local outputWindow 27 | if self.pad > 0 then 28 | outputWindow = self.output:narrow(dim, 1, input:size(dim)) 29 | else 30 | outputWindow = self.output:narrow(dim, 1 - self.pad, input:size(dim)) 31 | end 32 | outputWindow:copy(input) 33 | return self.output 34 | end 35 | 36 | function Padding:updateGradInput(input, gradOutput) 37 | self.gradInput:resizeAs(input) 38 | local dim = self.dim 39 | if self.nInputDim and input:dim() ~= self.nInputDim then 40 | dim = dim + 1 41 | end 42 | local gradOutputWindow 43 | if self.pad > 0 then 44 | gradOutputWindow = gradOutput:narrow(dim, 1, input:size(dim)) 45 | else 46 | gradOutputWindow = gradOutput:narrow(dim, 1 - self.pad, input:size(dim)) 47 | end 48 | self.gradInput:copy(gradOutputWindow) 49 | return self.gradInput 50 | end -------------------------------------------------------------------------------- /dmn/models/nn_units/SmoothCosineSimilarity.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Input: a table of two inputs {M, k}, where 4 | M = an n-by-m matrix 5 | k = an m-dimensional vector 6 | 7 | Output: an n-dimensional vector 8 | 9 | Each element is an approximation of the cosine similarity between k and the 10 | corresponding row of M. It's an approximation since we add a constant to the 11 | denominator of the cosine similarity function to remove the singularity when 12 | one of the inputs is zero. 13 | 14 | --]] 15 | 16 | local SmoothCosineSimilarity, parent = torch.class('dmn.SmoothCosineSimilarity', 'nn.Module') 17 | 18 | function SmoothCosineSimilarity:__init(smoothen) 19 | parent.__init(self) 20 | self.gradInput = {} 21 | self.smooth = smoothen or 1e-3 22 | end 23 | 24 | function SmoothCosineSimilarity:updateOutput(input) 25 | local M, k = unpack(input) 26 | self.rownorms = torch.cmul(M, M):sum(2):sqrt():view(M:size(1)) 27 | self.knorm = math.sqrt(k:dot(k)) 28 | self.dot = M * k 29 | self.output:set(torch.cdiv(self.dot, self.rownorms * self.knorm + self.smooth)) 30 | return self.output 31 | end 32 | 33 | function SmoothCosineSimilarity:updateGradInput(input, gradOutput) 34 | local M, k = unpack(input) 35 | self.gradInput[1] = self.gradInput[1] or input[1].new() 36 | self.gradInput[2] = self.gradInput[2] or input[2].new() 37 | 38 | -- M gradient 39 | local rows = M:size(1) 40 | local Mgrad = self.gradInput[1] 41 | Mgrad:set(k:repeatTensor(rows, 1)) 42 | for i = 1, rows do 43 | if self.rownorms[i] > 0 then 44 | Mgrad[i]:add(-self.output[i] * self.knorm / self.rownorms[i], M[i]) 45 | end 46 | Mgrad[i]:mul(gradOutput[i] / (self.rownorms[i] * self.knorm + self.smooth)) 47 | end 48 | 49 | -- k gradient 50 | self.gradInput[2]:set(M:t() * torch.cdiv(gradOutput, self.rownorms * self.knorm + self.smooth)) 51 | if self.knorm > 0 then 52 | local scale = torch.cmul(self.output, self.rownorms) 53 | :cdiv(self.rownorms * self.knorm + self.smooth) 54 | :dot(gradOutput) / self.knorm 55 | self.gradInput[2]:add(-scale, k) 56 | end 57 | return self.gradInput 58 | end 59 | -------------------------------------------------------------------------------- /dmn/models/nn_units/SpatialCrossLRN.lua: -------------------------------------------------------------------------------- 1 | local SpatialCrossLRN, parent = torch.class('dmn.SpatialCrossLRN', 'nn.Module') 2 | 3 | function SpatialCrossLRN:__init(size, alpha, beta, k) 4 | parent.__init(self) 5 | 6 | self.size = size 7 | self.alpha = alpha or 0.0001 8 | self.beta = beta or 0.75 9 | self.k = k or 1 10 | 11 | self.scale = torch.Tensor() 12 | 13 | end 14 | 15 | function SpatialCrossLRN:cuda() 16 | dmn.logger:print("Cuda called on this guy") 17 | end 18 | 19 | function SpatialCrossLRN:updateOutput(input) 20 | assert(input:dim() == 3 or input:dim() == 4, 21 | 'Input must be 3D or 4D') 22 | local isBatch = true 23 | if input:dim() == 3 then 24 | input = nn.utils.addSingletonDimension(input) 25 | isBatch = false 26 | end 27 | 28 | local batchSize = input:size(1) 29 | local channels = input:size(2) 30 | local inputHeight = input:size(3) 31 | local inputWidth = input:size(4) 32 | 33 | self.output:resizeAs(input) 34 | self.scale:resizeAs(input) 35 | 36 | -- use output storage as temporary buffer 37 | local inputSquare = self.output 38 | inputSquare:pow(input, 2) 39 | 40 | local prePad = (self.size - 1)/2 + 1 41 | local prePadCrop = prePad > channels and channels or prePad 42 | 43 | local scaleFirst = self.scale:select(2,1) 44 | scaleFirst:zero() 45 | -- compute first feature map normalization 46 | for c = 1, prePadCrop do 47 | scaleFirst:add(inputSquare:select(2, c)) 48 | end 49 | 50 | -- reuse computations for next feature maps normalization 51 | -- by adding the next feature map and removing the previous 52 | for c = 2, channels do 53 | local scalePrevious = self.scale:select(2, c -1) 54 | local scaleCurrent = self.scale:select(2, c) 55 | scaleCurrent:copy(scalePrevious) 56 | if c < channels - prePad + 2 then 57 | local squareNext = inputSquare:select(2, c + prePad - 1) 58 | scaleCurrent:add(1, squareNext) 59 | end 60 | if c > prePad then 61 | local squarePrevious = inputSquare:select(2, c - prePad ) 62 | scaleCurrent:add(-1, squarePrevious) 63 | end 64 | end 65 | 66 | self.scale:mul(self.alpha/self.size):add(self.k) 67 | 68 | self.output:pow(self.scale,-self.beta) 69 | self.output:cmul(input) 70 | 71 | if not isBatch then 72 | self.output = self.output[1] 73 | end 74 | 75 | return self.output 76 | end 77 | 78 | function SpatialCrossLRN:updateGradInput(input, gradOutput) 79 | assert(input:dim() == 3 or input:dim() == 4, 80 | 'Input must be 3D or 4D') 81 | local isBatch = true 82 | if input:dim() == 3 then 83 | input = nn.utils.addSingletonDimension(input) 84 | gradOutput = nn.utils.addSingletonDimension(gradOutput) 85 | self.output = nn.utils.addSingletonDimension(self.output) 86 | isBatch = false 87 | end 88 | 89 | local batchSize = input:size(1) 90 | local channels = input:size(2) 91 | local inputHeight = input:size(3) 92 | local inputWidth = input:size(4) 93 | 94 | self.paddedRatio = self.paddedRatio or input.new() 95 | self.accumRatio = self.accumRatio or input.new() 96 | self.paddedRatio:resize(channels + self.size - 1, inputHeight, inputWidth) 97 | self.accumRatio:resize(inputHeight,inputWidth) 98 | 99 | local cacheRatioValue = 2*self.alpha*self.beta/self.size 100 | local inversePrePad = self.size - (self.size - 1) / 2 101 | 102 | self.gradInput:resizeAs(input) 103 | self.gradInput:pow(self.scale,-self.beta):cmul(gradOutput) 104 | 105 | self.paddedRatio:zero() 106 | local paddedRatioCenter = self.paddedRatio:narrow(1, inversePrePad, channels) 107 | for n = 1, batchSize do 108 | paddedRatioCenter:cmul(gradOutput[n],self.output[n]) 109 | paddedRatioCenter:cdiv(self.scale[n]) 110 | self.accumRatio:sum(self.paddedRatio:narrow(1,1,self.size-1), 1) 111 | for c = 1, channels do 112 | self.accumRatio:add(self.paddedRatio[c+self.size-1]) 113 | self.gradInput[n][c]:addcmul(-cacheRatioValue, input[n][c], self.accumRatio) 114 | self.accumRatio:add(-1, self.paddedRatio[c]) 115 | end 116 | end 117 | 118 | if not isBatch then 119 | self.gradInput = self.gradInput[1] 120 | self.output = self.output[1] 121 | end 122 | 123 | return self.gradInput 124 | end -------------------------------------------------------------------------------- /dmn/models/nn_units/Squeeze.lua: -------------------------------------------------------------------------------- 1 | local Squeeze, parent = torch.class('dmn.Squeeze', 'nn.Module') 2 | 3 | function Squeeze:__init() 4 | parent.__init(self) 5 | self.gradInput = {} 6 | end 7 | 8 | 9 | function Squeeze:updateOutput(input) 10 | self.size = input:size() 11 | self.output = input:squeeze() 12 | return self.output 13 | end 14 | 15 | function Squeeze:cuda() 16 | dmn.logger:print("Called cuda on squeeze unit") 17 | end 18 | 19 | function Squeeze:updateGradInput(input, gradOutput) 20 | self.gradInput = gradOutput:view(self.size) 21 | return self.gradInput 22 | end -------------------------------------------------------------------------------- /dmn/question_module/QuestionModule.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | A QuestionModule takes two things as input: 4 | 1) a lookup-table for word embeddings 5 | 1) an LSTM cell network for encoding 6 | 7 | It first encodes word indices to embeddings and then 8 | It encodes the word embeddings into the memory states of the lstm. 9 | Returns final memory state of lstm at the end 10 | 11 | --]] 12 | 13 | local QuestionModule = torch.class('dmn.QuestionModule') 14 | 15 | function QuestionModule:__init(config) 16 | -- parameters for lstm cell 17 | assert(config.gpu_mode ~= nil, "Must specify gpu mode") 18 | assert(config.emb_dim ~= nil, "Must specify embed dimensions") 19 | assert(config.num_classes ~= nil, "Must specify number of classes") 20 | assert(config.dropout_prob ~= nil, "Must specify dropout probability") 21 | assert(config.dropout ~= nil, "Must specify dropout") 22 | assert(config.gpu_mode ~= nil, "Must specify gpu mode") 23 | assert(config.mem_dim ~= nil, "Must specify memory dimension of lstm") 24 | assert(config.num_layers ~= nil, "Must specify number of layers to lstm") 25 | assert(config.hashing ~= nil, "Must specify whether to hash word tokens or not") 26 | 27 | self.config = config 28 | self.hashing = config.hashing 29 | self.gpu_mode = config.gpu_mode 30 | self.mem_dim = config.mem_dim 31 | self.num_layers = config.num_layers 32 | self.reverse = false; 33 | 34 | local embed_type = self.hashing and dmn.SparseHashLayer or dmn.EmbedLayer 35 | self.embed_layer = embed_type{ 36 | gpu_mode = config.gpu_mode, 37 | emb_dim = config.emb_dim, 38 | num_classes = config.num_classes, 39 | dropout_prob = config.dropout_prob, 40 | gpu_mode = config.gpu_mode, 41 | dropout = config.dropout 42 | } 43 | self.lstm_layer = dmn.LSTM_Encoder{ 44 | in_dim = config.emb_dim, 45 | mem_dim = config.mem_dim, 46 | num_layers = config.num_layers, 47 | gpu_mode = config.gpu_mode 48 | } 49 | 50 | self.hidden_inputs = new_hidden_activations_lstm(self.gpu_mode, self.mem_dim, self.num_layers) 51 | 52 | self.tot_modules = {} 53 | insert_modules_to_table(self.tot_modules, self.lstm_layer:getModules()) 54 | insert_modules_to_table(self.tot_modules, self.embed_layer:getModules()) 55 | 56 | self.modules = nn.Parallel() 57 | add_modules(self.modules, self.lstm_layer:getModules()) 58 | add_modules(self.modules, self.embed_layer:getModules()) 59 | 60 | if self.gpu_mode then 61 | self:set_gpu_mode() 62 | end 63 | 64 | print("Modules we're optimizing for question module") 65 | print(self.modules) 66 | end 67 | 68 | -- Enable Dropouts 69 | function QuestionModule:enable_dropouts() 70 | self.embed_layer:enable_dropouts() 71 | end 72 | 73 | -- Disable Dropouts 74 | function QuestionModule:disable_dropouts() 75 | self.embed_layer:disable_dropouts() 76 | end 77 | 78 | 79 | 80 | -- Resets depth to 1 81 | function QuestionModule:reset_depth() 82 | self.lstm_layer:forget() 83 | end 84 | 85 | 86 | function QuestionModule:zeroGradParameters() 87 | self.grad_params:zero() 88 | self.lstm_layer:zeroGradParameters() 89 | end 90 | 91 | -- Forward propagate. 92 | -- inputs: T x in_dim tensor, where T is the number of time steps. 93 | -- states: hidden, cell states of LSTM if true, read the input from right to left (useful for bidirectional LSTMs). 94 | -- Returns lstm output, class predictions, and error if train, else not error 95 | function QuestionModule:forward(inputs) 96 | assert(inputs ~= nil, "Must specify inputs to forward") 97 | self.word_embeds = self.embed_layer:forward(inputs) 98 | local lstm_output = self.lstm_layer:forward(self.word_embeds, self.hidden_inputs, self.reverse) 99 | return lstm_output 100 | end 101 | 102 | 103 | -- Backpropagate: forward() must have been called previously on the same input. 104 | -- inputs: T x in_dim tensor, where T is the number of time steps. 105 | -- hidden_inputs: {hidden_dim, hidden_tim} tensors 106 | -- reverse: True if reverse input, false otherwise 107 | -- errors: T x num_layers x num_hidden tensor 108 | -- class_predictions: T x 1 tensor of predictions 109 | -- labels: actual labels 110 | -- Returns the gradients with respect to the inputs (in the same order as the inputs). 111 | function QuestionModule:backward(inputs, errors) 112 | assert(inputs ~= nil, "Must put in gru regular inputs") 113 | assert(errors ~= nil, "must put in lstm outputs") 114 | local lstm_input_derivs, hidden_derivs = 115 | self.lstm_layer:backward(self.word_embeds, self.hidden_inputs, self.reverse, errors) 116 | local emb_errors = self.embed_layer:backward(inputs, lstm_input_derivs) 117 | return lstm_input_derivs, hidden_derivs 118 | end 119 | 120 | function QuestionModule:grad_check() 121 | self.params, self.grad_params = self.modules:getParameters() 122 | local input_indices = torch.IntTensor{1, 2, 3, 2, 3, 4, 2, 3, 4, 2, 3, 4} 123 | local criterion = nn.MSECriterion() 124 | local desired_state = torch.rand(self.lstm_layer.mem_dim) 125 | 126 | local currIndex = 0 127 | local feval = function(x) 128 | self.grad_params:zero() 129 | local lstm_output = self:forward(input_indices) 130 | local loss = criterion:forward(lstm_output, desired_state) 131 | local errors = criterion:backward(lstm_output, desired_state) 132 | self:backward(input_indices, errors) 133 | currIndex = currIndex + 1 134 | print(currIndex, " of ", self.params:size(1)) 135 | return loss, self.grad_params 136 | end 137 | -- check gradients for lstm layer 138 | diff, DC, DC_est = optim.checkgrad(feval, self.params, 1e-7) 139 | print("Gradient error for question module network is") 140 | print(diff) 141 | assert(diff < 1e-5, "Gradient is greater than tolerance") 142 | end 143 | 144 | -- Sets all networks to gpu mode 145 | function QuestionModule:set_gpu_mode() 146 | self.lstm_layer:set_gpu_mode() 147 | self.embed_layer:set_gpu_mode() 148 | end 149 | 150 | -- Sets all networks to cpu mode 151 | function QuestionModule:set_cpu_mode() 152 | self.lstm_layer:set_cpu_mode() 153 | self.embed_layer:set_cpu_mode() 154 | end 155 | 156 | function QuestionModule:getModules() 157 | return self.tot_modules 158 | end 159 | 160 | function QuestionModule:getParameters() 161 | return self.params, self.grad_params 162 | end 163 | 164 | function QuestionModule:getWeights() 165 | return self.params 166 | end 167 | 168 | -- Resets depths for lstm 169 | function QuestionModule:forget() 170 | self.lstm_layer:forget() 171 | end 172 | 173 | function QuestionModule:print_config() 174 | printf('%-25s = %d\n', 'embed dimension', self.config.emb_dim) 175 | printf('%-25s = %d\n', 'input dimension', self.config.in_dim) 176 | printf('%-25s = %s\n', 'use dropout', self.config.dropout) 177 | printf('%-25s = %f\n', 'dropout probability', self.config.dropout_prob) 178 | printf('%-25s = %d\n', 'number of classes', self.config.num_classes) 179 | printf('%-25s = %d\n', 'memory dimension', self.config.mem_dim) 180 | printf('%-25s = %d\n', 'number of layers', self.config.num_layers) 181 | end 182 | 183 | 184 | 185 | -------------------------------------------------------------------------------- /dmn/scripts/create_vocab.py: -------------------------------------------------------------------------------- 1 | """ 2 | Preprocessing script for MT data. 3 | 4 | """ 5 | import re 6 | import json 7 | import os 8 | import glob 9 | import time 10 | 11 | def make_dirs(dirs): 12 | for d in dirs: 13 | if not os.path.exists(d): 14 | os.makedirs(d) 15 | 16 | 17 | def build_vocab(dataset_paths, dst_path, word_count_threshold = 5): 18 | 19 | # count up all word counts so that we can threshold 20 | # this shouldnt be too expensive of an operation 21 | print ('preprocessing word counts and creating vocab based on word count threshold %d' % (word_count_threshold, )) 22 | t0 = time.time() 23 | word_counts = {} 24 | nsents = 0 25 | 26 | for dataset_path in dataset_paths: 27 | dataset = open(dataset_path, 'r') 28 | line = 'asdf' 29 | while True: 30 | line = dataset.readline() 31 | if line == '': break 32 | # Remove newline characters 33 | trimmedLine = line.replace('\n', '').replace('\r', '').replace('\r\n', '').replace('(', '( ') 34 | # add space between question marks and periods 35 | paddedLine = trimmedLine.replace('?', ' ?').replace('.', ' .').replace(')', ' )').replace('-', ' - ') 36 | paddedLine = paddedLine.replace(',', ' , ').replace('"', ' " ') 37 | 38 | tokens = paddedLine.split(' ') 39 | for w in tokens: 40 | word_counts[w] = word_counts.get(w, 0) + 1 41 | 42 | vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold] 43 | print ('filtered words from %d to %d in %.2fs' % (len(word_counts), len(vocab), time.time() - t0)) 44 | 45 | # with K distinct words: 46 | # - there are K+1 possible inputs (START token and all the words) 47 | # - there are K+1 possible outputs (END token and all the words) 48 | # we use ixtoword to take predicted indeces and map them to words for output visualization 49 | # we use wordtoix to take raw words and get their index in word vector matrix 50 | ixtoword = {} 51 | ixtoword[0] = '.' # period at the end of the sentence. make first dimension be end token 52 | wordtoix = {} 53 | wordtoix['#START#'] = 0 # make first vector be the start token 54 | ix = 1 55 | for w in vocab: 56 | wordtoix[w] = ix 57 | ixtoword[ix] = w 58 | ix += 1 59 | 60 | with open(dst_path, 'w') as f: 61 | for i in ixtoword: 62 | w = ixtoword[i] 63 | f.write(w + '\n') 64 | 65 | print('saved vocabulary to %s' % dst_path) 66 | 67 | if __name__ == '__main__': 68 | print('=' * 80) 69 | print('Preprocessing QA dataset') 70 | print('=' * 80) 71 | 72 | base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 73 | data_dir = os.path.join(base_dir, 'data') 74 | QA_dir = os.path.join(data_dir, 'Translation/train') 75 | 76 | input_paths = [os.path.join(QA_dir, 'inputs.txt')] 77 | question_paths = [os.path.join(QA_dir, 'questions.txt')] 78 | output_paths = [os.path.join(QA_dir, 'outputs.txt')] 79 | 80 | input_save_path = os.path.join(QA_dir, 'input_vocab.txt') 81 | question_save_path = os.path.join(QA_dir, 'question_vocab.txt') 82 | output_save_path = os.path.join(QA_dir, 'output_vocab.txt') 83 | 84 | # get vocabulary 85 | token_paths = [input_paths, question_paths, output_paths] 86 | save_paths = [input_save_path, question_save_path, output_save_path] 87 | 88 | for i in range(0, len(token_paths)): 89 | build_vocab( 90 | token_paths[i], 91 | save_paths[i], 92 | 1 93 | ) 94 | -------------------------------------------------------------------------------- /dmn/scripts/setup.sh: -------------------------------------------------------------------------------- 1 | python create_vocab.py 2 | 3 | dmn.cloud_uploader:download('trained_models/Attention_Network_char_level_true_attention_type_fine_14.th', 'Attention_Network_char_level_true_attention_type_fine_14.th', 'softmax_models') 4 | dmn.cloud_uploader:download('trained_models/856_Captioner_Network_9.th', '856_Captioner_Network_9.th', 'softmax_models') 5 | -------------------------------------------------------------------------------- /dmn/scripts/untitled: -------------------------------------------------------------------------------- 1 | extract_image_features.lua -------------------------------------------------------------------------------- /dmn/semantic_memory_module/WordEmbedModule.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | A WordEmbedModule takes two things as input: 4 | 1) a lookup-table for word embeddings 5 | 6 | It encodes word indices to embeddings which could either be hash 7 | indices or hash + LSTM indices 8 | 9 | --]] 10 | 11 | local WordEmbedModule = torch.class('dmn.WordEmbedModule') 12 | 13 | function WordEmbedModule:__init(config) 14 | -- parameters for lstm cell 15 | assert(config.gpu_mode ~= nil, "Must specify gpu mode") 16 | assert(config.emb_dim ~= nil, "Must specify embed dimensions") 17 | assert(config.num_classes ~= nil, "Must specify number of classes") 18 | assert(config.dropout_prob ~= nil, "Must specify dropout probability") 19 | assert(config.hashing ~= nil, "Must specify whether to hash or not") 20 | assert(config.dropout ~= nil, "Must specify whether to use dropout or not") 21 | 22 | self.config = config 23 | self.hashing = config.hashing 24 | self.gpu_mode = config.gpu_mode 25 | self.reverse = false; 26 | 27 | local embed_type = self.hashing and dmn.FastHashLayer or dmn.EmbedLayer 28 | 29 | self.embed_layer = embed_type{ 30 | gpu_mode = config.gpu_mode, 31 | emb_dim = config.emb_dim, 32 | num_classes = config.num_classes, 33 | dropout_prob = config.dropout_prob, 34 | dropout = config.dropout 35 | } 36 | 37 | self.modules = nn.Parallel() 38 | add_modules(self.modules, self.embed_layer:getModules()) 39 | 40 | if self.gpu_mode then 41 | dmn.logger:print("Setting word embed module to gpu mode") 42 | self:set_gpu_mode() 43 | end 44 | 45 | self.tot_modules = {} 46 | insert_modules_to_table(self.tot_modules, self.embed_layer:getModules()) 47 | 48 | --self.params, self.grad_params = self.modules:getParameters() 49 | 50 | dmn.logger:print("Modules we're optimizing for word embed module") 51 | dmn.logger:print(self.modules) 52 | end 53 | 54 | function WordEmbedModule:share(other, ...) 55 | assert(other ~= nil, "Must specify other embed layer to share") 56 | dmn.logger:print("Sharing word embed module") 57 | self.embed_layer:share_params(other.embed_layer, ...) 58 | end 59 | 60 | -- Enable Dropouts 61 | function WordEmbedModule:enable_dropouts() 62 | self.embed_layer:enable_dropouts() 63 | end 64 | 65 | -- Disable Dropouts 66 | function WordEmbedModule:disable_dropouts() 67 | self.embed_layer:disable_dropouts() 68 | end 69 | 70 | -- Resets depth to 1 71 | function WordEmbedModule:reset_depth() 72 | end 73 | 74 | 75 | function WordEmbedModule:zeroGradParameters() 76 | self.grad_params:zero() 77 | self.embed_layer:zeroGradParameters() 78 | end 79 | 80 | -- Forward propagate. 81 | -- inputs: T x in_dim tensor, where T is the number of time steps. 82 | -- states: hidden, cell states of LSTM if true, read the input from right to left (useful for bidirectional LSTMs). 83 | -- Returns lstm output, class predictions, and error if train, else not error 84 | function WordEmbedModule:forward(inputs) 85 | assert(inputs ~= nil, "Must specify inputs to forward for word embed module") 86 | self.word_embeds = self.embed_layer:forward(inputs) 87 | return self.word_embeds 88 | end 89 | 90 | 91 | -- Backpropagate: forward() must have been called previously on the same input. 92 | -- inputs: T x in_dim tensor, where T is the number of time steps. 93 | -- hidden_inputs: {hidden_dim, hidden_tim} tensors 94 | -- reverse: True if reverse input, false otherwise 95 | -- errors: T x input_size error 96 | -- class_predictions: T x 1 tensor of predictions 97 | -- labels: actual labels 98 | -- Returns the gradients with respect to the inputs (in the same order as the inputs). 99 | function WordEmbedModule:backward(inputs, errors) 100 | assert(inputs ~= nil, "Must put in gru regular inputs") 101 | assert(errors ~= nil, "must put in lstm outputs") 102 | local emb_errors = self.embed_layer:backward(inputs, errors) 103 | return emb_errors 104 | end 105 | 106 | function WordEmbedModule:grad_check() 107 | self.params, self.grad_params = self.modules:getParameters() 108 | local input_indices = torch.IntTensor{1, 2, 3} 109 | local criterion = nn.MSECriterion() 110 | local desired_state = torch.rand(3, self.lstm_layer.mem_dim) 111 | 112 | local currIndex = 0 113 | local feval = function(x) 114 | self.grad_params:zero() 115 | local lstm_output = self:forward(input_indices) 116 | local loss = criterion:forward(lstm_output, desired_state) 117 | local errors = criterion:backward(lstm_output, desired_state) 118 | self:backward(input_indices, errors) 119 | currIndex = currIndex + 1 120 | print(currIndex, " of ", self.params:size(1)) 121 | return loss, self.grad_params 122 | end 123 | -- check gradients for lstm layer 124 | diff, DC, DC_est = optim.checkgrad(feval, self.params, 1e-7) 125 | print("Gradient error for word embed module network is") 126 | print(diff) 127 | assert(diff < 1e-5, "Gradient is greater than tolerance") 128 | end 129 | 130 | -- Sets all networks to gpu mode 131 | function WordEmbedModule:set_gpu_mode() 132 | dmn.logger:print("Setting word embed module to gpu mode") 133 | self.gpu_mode = true 134 | self.config.gpu_mode = true 135 | self.embed_layer.gpu_mode = true 136 | self.embed_layer:set_gpu_mode() 137 | end 138 | 139 | -- Sets all networks to cpu mode 140 | function WordEmbedModule:set_cpu_mode() 141 | dmn.logger:print("Setting word embed module to cpu mode") 142 | self.gpu_mode = false 143 | self.config.gpu_mode = false 144 | self.embed_layer.gpu_mode = false 145 | self.embed_layer:set_cpu_mode() 146 | end 147 | 148 | function WordEmbedModule:getModules() 149 | return self.tot_modules 150 | end 151 | 152 | function WordEmbedModule:getParameters() 153 | return self.params, self.grad_params 154 | end 155 | 156 | function WordEmbedModule:getWeights() 157 | return self.params 158 | end 159 | 160 | -- Resets depths for lstm 161 | function WordEmbedModule:forget() 162 | end 163 | 164 | function WordEmbedModule:print_config() 165 | local num_params = 0 --self.params:size(1) 166 | printf('%-25s = %d\n', 'num params for word embed module', num_params) 167 | printf('%-25s = %s\n', 'gpu mode', self.config.gpu_mode) 168 | printf('%-25s = %s\n', 'use dropout', self.config.dropout) 169 | printf('%-25s = %d\n', 'embed dimension', self.config.emb_dim) 170 | printf('%-25s = %d\n', 'number of classes', self.config.num_classes) 171 | printf('%-25s = %f\n', 'input dropout probability', self.config.dropout_prob) 172 | end 173 | 174 | -------------------------------------------------------------------------------- /dmn/test_scripts.sh: -------------------------------------------------------------------------------- 1 | # For testing gpu vs. cpu speed of various networks 2 | for OUTPUT in $(ls tests/*) 3 | do 4 | th $OUTPUT 5 | done -------------------------------------------------------------------------------- /dmn/tests/NCE_test.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Gradient checks the noise-contrastive estimation criterion 3 | ]] 4 | 5 | require('../../dmn') 6 | 7 | model = nn.Sequential() 8 | :add(nn.Linear(50, 50)) 9 | :add(nn.SoftMax()) 10 | 11 | criterion = dmn.NCECriterion() 12 | 13 | local inputs = torch.rand(20, 50) 14 | local sample_probs = torch.rand(20, 50):fill(0.001) 15 | 16 | local params, grad_params = model:getParameters() 17 | local currIndex = 0 18 | local feval = function(x) 19 | grad_params:zero() 20 | local total_err = 0 21 | for i = 1, inputs:size(1) do 22 | local res = model:forward(inputs[i]) 23 | 24 | -- accumulate total error 25 | curr_err = criterion:forward(res, sample_probs[i]) 26 | total_err = total_err + curr_err 27 | 28 | input_err = criterion:backward(res, sample_probs[i]) 29 | local input_grads = model:backward(inputs[i], input_err) 30 | end 31 | return total_err, grad_params 32 | end 33 | 34 | -- check gradients for lstm layer 35 | diff, DC, DC_est = optim.checkgrad(feval, params, 1e-4) 36 | print("Gradient error for document embed module network is") 37 | print(diff) 38 | assert(diff < 1e-5, "Gradient is greater than tolerance") 39 | 40 | for i = 1, 100 do 41 | optim.sgd(feval, params, {learningRate = 1e-1}) 42 | end 43 | 44 | res = model:forward(inputs) 45 | 46 | for i = 1, res:size(1) do 47 | print(res[i][1]) 48 | print(res[i][{{2, 30}}]) 49 | end -------------------------------------------------------------------------------- /dmn/tests/attention_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | -- testing basic attention unit 4 | tmp = dmn.attention_units.attention_unit(5, 2) 5 | inputs = torch.rand(10, 5) 6 | print(inputs) 7 | results = tmp:forward(inputs) 8 | 9 | print(results) 10 | 11 | -- testing basic gru unit 12 | local gru_unit = dmn.rnn_units.gru_unit(5, 3, 2) 13 | local res = gru_unit:forward({torch.rand(5), {torch.rand(3), torch.rand(3)}}) 14 | 15 | -- testing basic lstm attention unit 16 | local in_dim = 5 17 | local mem_dim = 7 18 | local num_layers = 2 19 | local attention_lstm_unit = dmn.rnn_units.attention_lstm_unit(in_dim, mem_dim, mem_dim, num_layers, 'coarse_fixed') 20 | 21 | local input = torch.rand(in_dim) 22 | local context = torch.rand(10, mem_dim) 23 | local context_2 = torch.zeros(15, mem_dim) 24 | local prev_hidden_state = {torch.rand(mem_dim), torch.rand(mem_dim)} 25 | local prev_cell_state = {torch.rand(mem_dim), torch.rand(mem_dim)} 26 | 27 | local res = attention_lstm_unit:forward({input, context, prev_cell_state, prev_hidden_state}) 28 | 29 | local res1 = attention_lstm_unit:forward({input, context, prev_cell_state, prev_hidden_state}) 30 | local errs = attention_lstm_unit:backward({input, context, prev_cell_state, prev_hidden_state}, res1) 31 | 32 | local img = dmn.image_functions.visualize_tensor(res1[3], 25) 33 | image.save("tmp.jpg", img) 34 | 35 | print("Forwrd backward DONE") 36 | for i,node in ipairs(attention_lstm_unit.forwardnodes) do 37 | local gmnode = attention_lstm_unit.forwardnodes[i] 38 | assert(gmnode, 'trying to map another gModule with a different structure') 39 | if node.data.annotations._debugLabel == '[.../NLP/DeepLearning/softmax/dmn/models/rnn_units/units.lua]:19' then 40 | end 41 | end 42 | 43 | 44 | -- testing gradient of attention network 45 | local lstm_unit = dmn.Attention_LSTM_Decoder{ 46 | in_dim = 5, 47 | context_dim = 2, 48 | mem_dim = 2, 49 | num_layers = 1, 50 | gpu_mode = false, 51 | attention_type = 'coarse' 52 | } 53 | lstm_unit:disable_dropouts() 54 | lstm_unit:grad_check() 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /dmn/tests/batch_answer_module_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | local network = dmn.AnswerModule{ 4 | gpu_mode = false, 5 | num_classes = 10, 6 | emb_dim = 100, 7 | mem_dim = 100, 8 | num_layers = 1, 9 | in_dropout_prob = 0.0, 10 | hidden_dropout_prob = 0.0, 11 | dropout = false, 12 | rnn_type = 'gf_lstm', 13 | cell_type = 'gf_lstm' 14 | } 15 | 16 | local test_network = function(network) 17 | local inputs = torch.rand(10, 100) 18 | local input_indices = torch.IntTensor{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 19 | {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 20 | {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}} 21 | local desired_indices = torch.IntTensor{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 22 | {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 23 | {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}} 24 | local mask = torch.IntTensor(10, 2) 25 | 26 | for i = 1, mask:size(1) do 27 | mask[i][1] = 5 28 | mask[i][2] = 4 29 | end 30 | 31 | local batch_loss, predictions, batch_lstm_output = network:forward(inputs, input_indices, desired_indices, mask) 32 | local memory_errors = network:backward(inputs, input_indices, desired_indices, mask) 33 | 34 | print("PREDICTIONS ARE") 35 | print(predictions) 36 | 37 | print(memory_errors:size()) 38 | 39 | local tot_loss = 0 40 | for i = 1, inputs:size(1) do 41 | local cur_input = inputs[i] 42 | local cur_input_indices = torch.squeeze(input_indices[{{},i}]) 43 | local cur_desired_indices = torch.squeeze(desired_indices[{{}, i}]) 44 | local curr_loss, curr_predictions, cur_lstm_output = network:forward( 45 | cur_input, 46 | cur_input_indices, 47 | cur_desired_indices, 48 | mask) 49 | local cur_memory_errors = network:backward( 50 | cur_input, 51 | cur_input_indices, 52 | cur_desired_indices, 53 | mask) 54 | 55 | print("Prediction difference") 56 | local diff = curr_predictions - predictions[{{},i}] 57 | local input_mem_diff = cur_memory_errors - memory_errors[i] 58 | local cur_lstm_output = batch_lstm_output[{{}, i}] 59 | print("Input memory difference " .. torch.abs(input_mem_diff):sum()) 60 | print(torch.abs(diff):sum()) 61 | tot_loss = tot_loss + curr_loss 62 | end 63 | 64 | print(tot_loss / inputs:size(1)) 65 | print(batch_loss) 66 | end 67 | 68 | test_network(network) 69 | 70 | --network.rnn_type = 'gru' 71 | --test_network(network) -------------------------------------------------------------------------------- /dmn/tests/batch_captioner_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | local dataset_path, model_params, dir_params, data_params = unpack(require('opts/context_captioning/caption_opts_piano.lua')) 4 | 5 | -- Tests that batch captioner works 6 | train_dataset, val_dataset, test_dataset 7 | = datasets.read_caption_data(dataset_path, 8 | model_params.use_gpu_mode, 9 | model_params.char_level, 10 | model_params.only_feats) 11 | 12 | vocab = train_dataset.vocab 13 | 14 | local pred_beam_size = 1 15 | local test_beam_size = 1 16 | 17 | -- initialize model 18 | local model = dmn.Captioner_Network{ 19 | char_level = model_params.char_level, 20 | vocab = vocab, 21 | only_feats = model_params.only_feats, 22 | load_weights = model_params.load_weights, 23 | batch_size = model_params.batch_size, 24 | optim_method_string = model_params.optim_method_string, 25 | optim_state = model_params.optim_state, 26 | network_type = model_params.network_type, 27 | gpu_mode = model_params.use_gpu_mode, 28 | num_classes = vocab.size, 29 | emb_dim = model_params.emb_dim, 30 | mem_dim = model_params.mem_dim, 31 | image_dim = model_params.image_dim, 32 | num_layers = model_params.num_layers, 33 | dropout = model_params.dropout, 34 | in_dropout_prob = model_params.in_dropout_prob, 35 | hidden_dropout_prob = model_params.hidden_dropout_prob, 36 | rnn_type = model_params.rnn_type, 37 | cell_type = model_params.cell_type, 38 | tune_image_features = model_params.tune_image_features 39 | } 40 | 41 | local images = torch.rand(4, 3, 224, 224) 42 | local input_sentences = torch.IntTensor{{1, 2, 3, 4}, 43 | {1, 4, 5, 6}, 44 | {1, 7, 8, 9}} 45 | 46 | -- input_sentences[i] = batch of indices corresponding to word i. 47 | local output_sentences = torch.IntTensor{{1, 2, 3, 4}, {1, 4, 5, 6}, {1, 7, 8, 9}} 48 | 49 | local input_masks = torch.IntTensor(4, 2) 50 | input_masks[1][1] = 5 51 | input_masks[1][2] = 3 52 | input_masks[2][1] = 5 53 | input_masks[2][2] = 2 54 | input_masks[3][1] = 5 55 | input_masks[3][2] = 3 56 | input_masks[4][1] = 5 57 | input_masks[4][2] = 3 58 | 59 | model.grad_params:zero() 60 | local batch_loss, predictions = model:forward(images, input_sentences, output_sentences, false, input_masks) 61 | local input_errs = model:backward(images, input_sentences, output_sentences, false, input_masks) 62 | local batch_sum = model.grad_params:sum() 63 | 64 | model.grad_params:zero() 65 | 66 | local cur_loss = 0 67 | -- should be the same as batch forwarding 68 | for i = 1, images:size(1) do 69 | local loss, predictions = model:forward(images[i], input_sentences[{{},i}], output_sentences[{{},i}], false, input_masks) 70 | model:backward(images[i], input_sentences[{{},i}], output_sentences[{{},i}], false, input_masks) 71 | cur_loss = cur_loss + loss 72 | end 73 | 74 | print(cur_loss / images:size(1)) 75 | print(batch_loss) 76 | 77 | local sequential_sum = model.grad_params:sum() / images:size(1) 78 | 79 | print("Batch sum, sequential sum ", batch_sum, sequential_sum) 80 | assert(batch_sum == sequential_sum, "Two must be equal") -------------------------------------------------------------------------------- /dmn/tests/batch_gf_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | num_layers = 1 3 | in_dim = 100 4 | mem_dim = 300 5 | local input = nn.Identity()() 6 | local htable_p = nn.Identity()() 7 | 8 | -- multilayer GRU 9 | local htable = {} 10 | for layer = 1, num_layers do 11 | -- get current inputs for the layer 12 | local curr_input_size = (layer == 1) and in_dim or mem_dim 13 | -- assert(false) 14 | local curr_input = (layer == 1) and input or nn.Identity()(htable[layer - 1]) 15 | 16 | -- get previous hidden state for layers 17 | local h_p = (num_layers == 1) and htable_p or nn.SelectTable(layer)(htable_p) 18 | 19 | local new_gate = function() 20 | local i2h = (layer == 1) 21 | and nn.Linear(in_dim, mem_dim)(input) 22 | or nn.Linear(mem_dim, mem_dim)(htable[layer - 1]) 23 | local h2h = nn.Linear(mem_dim, mem_dim)(h_p) 24 | return nn.CAddTable()({i2h, h2h}) 25 | end 26 | 27 | local gf_gate = function() 28 | -- get input module 29 | -- U_i->j*h^i_t-1 30 | local gf_layer = {} 31 | local concatenated_features = (num_layers == 1) and htable_p or nn.JoinTable(1){htable_p} 32 | for j = 1, num_layers do 33 | local gf_input_size = (layer == 1) and in_dim or mem_dim 34 | local in_module = (layer == 1) 35 | and nn.Linear(in_dim, mem_dim)(input) 36 | or nn.Linear(mem_dim, mem_dim)(htable[layer - 1]) 37 | 38 | local hidden_concat = nn.Linear(num_layers * mem_dim, 1)(concatenated_features) 39 | local reset_gate = nn.Sigmoid()(nn.CAddTable(){hidden_concat, nn.Linear(in_dim, 1)(input)}) 40 | 41 | -- replicate reset gate. 42 | local replicated_reset_gate = nn.Replicate(mem_dim)(reset_gate) 43 | 44 | -- now wrong dimensions, so transpose 45 | local transposed_replicated_reset_gate = dmn.Squeeze()(nn.Transpose({2, 1})(replicated_reset_gate)) 46 | 47 | -- g_i->j * U_i->j * h^i_t-1 48 | local curr_sum = nn.CMulTable(){transposed_replicated_reset_gate, in_module} 49 | 50 | -- quirks for single-layered things 51 | if num_layers > 1 then 52 | table.insert(gf_layer, curr_sum) 53 | else 54 | gf_layer = curr_sum 55 | end 56 | end 57 | 58 | -- quirks for single-layered things 59 | local summed_gf = (num_layers == 1) and gf_layer or nn.CAddTable()(gf_layer) 60 | return summed_gf 61 | end 62 | 63 | -- GRU tick 64 | -- forward the update and reset gates 65 | local update_gate = nn.Sigmoid()(gf_gate()) 66 | local reset_gate = nn.Sigmoid()(new_gate()) 67 | -- compute candidate hidden state 68 | local gated_hidden = nn.CMulTable()({reset_gate, h_p}) 69 | local p2 = nn.Linear(mem_dim, mem_dim)(gated_hidden) 70 | local p1 = nn.Linear(curr_input_size, mem_dim)(curr_input) 71 | local hidden_candidate = nn.Tanh()(nn.CAddTable()({p1,p2})) 72 | -- compute new interpolated hidden state, based on the update gate 73 | local zh = nn.CMulTable()({update_gate, hidden_candidate}) 74 | local zhm1 = nn.CMulTable()({nn.AddConstant(1,false)(nn.MulConstant(-1,false)(update_gate)), h_p}) 75 | local next_h = nn.CAddTable()({zh, zhm1}) 76 | htable[layer] = update_gate 77 | end 78 | 79 | -- if GRU is single-layered, this makes htable/ctable Tensors (instead of tables). 80 | -- this avoids some quirks with nngraph involving tables of size 1. 81 | htable = nn.Identity()(htable) 82 | local cell = nn.gModule({input, htable_p}, {htable}) 83 | 84 | local res = cell:forward({torch.rand(50, 100), torch.rand(50, 300)}) 85 | 86 | print(res) -------------------------------------------------------------------------------- /dmn/tests/batch_gradient_mask_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | tmp = torch.rand(5, 5, 2) 4 | res1 = tmp[{{},{1}}] 5 | 6 | tmp[{{2, 5},{1}}]:zero() 7 | 8 | print(tmp) 9 | print(tmp[{{1, 2},{1}}]) 10 | print(tmp[{{1, 2},{2}}]) 11 | 12 | local network = dmn.BatchMask() 13 | 14 | local indices = torch.IntTensor(5, 2):fill(1) 15 | indices[2]:fill(2) 16 | indices[3]:fill(3) 17 | local masked_input = network:forward(tmp, indices) 18 | local input = network:backward(tmp, indices, torch.rand(5, 5, 2)) 19 | 20 | print(masked_input) 21 | print(input) 22 | 23 | tmp = torch.rand(5, 2) 24 | 25 | local res1 = network:forward(tmp, indices) 26 | local res2 = network:backward(tmp, indices, torch.rand(5, 2)) 27 | 28 | print(res1 - tmp) 29 | print(res2) -------------------------------------------------------------------------------- /dmn/tests/batch_gru_test.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Batch forwarding LSTMs test 3 | ]] 4 | 5 | require('..') 6 | 7 | local config = { 8 | in_dim = 100, 9 | mem_dim = 100, 10 | num_layers = 1, 11 | gpu_mode = false, 12 | dropout = false, 13 | gru_type = 'gru' 14 | } 15 | 16 | local network = dmn.GRU_Decoder(config) 17 | 18 | local t1 = sys.clock() 19 | results = network:forward(torch.rand(200, 100, 100), torch.rand(100, 100), false) 20 | results1 = network:backward(torch.rand(200, 100, 100), torch.rand(100, 100), false, results) 21 | local t2 = sys.clock() 22 | 23 | print((t2 - t1) / 100) 24 | 25 | results = network:forward(torch.rand(5, 100), torch.rand(100), false) 26 | results1 = network:forward(torch.rand(5, 100), torch.rand(100), false, results1) -------------------------------------------------------------------------------- /dmn/tests/batch_lstm_equivalence_test.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Batch forwarding LSTMs test 3 | ]] 4 | 5 | require('..') 6 | 7 | local config = { 8 | in_dim = 5, 9 | mem_dim = 10, 10 | num_layers = 1, 11 | gpu_mode = false, 12 | dropout = false, 13 | lstm_type = 'lstm', 14 | gru_type = 'gru' 15 | } 16 | 17 | local test_network = function(network, network_type) 18 | local hidden_batch_inputs = network_type == 'lstm' and {torch.rand(8, 10), torch.rand(8, 10)} 19 | or torch.rand(8, 10) 20 | local batch_inputs = torch.rand(3, 8, 5) 21 | 22 | 23 | 24 | local t1 = sys.clock() 25 | results = network:forward(batch_inputs, hidden_batch_inputs, false) 26 | local rand_outputs = results:clone():fill(0.5) 27 | 28 | results1, results2 = network:backward(batch_inputs, hidden_batch_inputs, false, rand_outputs) 29 | local t2 = sys.clock() 30 | 31 | --print(results:size()) 32 | print((t2 - t1) / 100) 33 | 34 | local t1 = sys.clock() 35 | for i = 1, batch_inputs:size(2) do 36 | local cur_input = torch.squeeze(batch_inputs[{{},i}]) 37 | local cur_hidden_input = network_type == 'lstm' and {hidden_batch_inputs[1][i], hidden_batch_inputs[2][i]} 38 | or hidden_batch_inputs[i] 39 | local cur_outputs = torch.squeeze(rand_outputs[{{}, i}]) 40 | results_single = network:forward(cur_input, cur_hidden_input, false) 41 | results1_single, results2_single = network:backward(cur_input, cur_hidden_input, false, cur_outputs) 42 | local diff = results_single - torch.squeeze(results[{{},i}]) 43 | local grad_diff = results1_single - torch.squeeze(results1[{{}, i}]) 44 | 45 | 46 | local hidden_grad_diff1, hidden_grad_diff2 47 | if network_type == 'lstm' then 48 | hidden_grad_diff1 = results2_single[1] 49 | - torch.squeeze(results2[1][i]) 50 | hidden_grad_diff2 = results2_single[2] 51 | - torch.squeeze(results2[2][i]) 52 | else 53 | hidden_grad_diff1 = results2_single - results2[i] 54 | hidden_grad_diff2 = hidden_grad_diff1 55 | end 56 | 57 | print("Difference in results " .. i .. " " .. torch.abs(diff):sum()) 58 | print("Difference in grads " .. i .. " " .. torch.abs(grad_diff):sum()) 59 | print("Difference in cell grads " .. i .. " " .. torch.abs(hidden_grad_diff1):sum()) 60 | print("Difference in hidden grads " .. i .. " " .. torch.abs(hidden_grad_diff2):sum()) 61 | end 62 | end 63 | 64 | print("Testing lstms") 65 | -- lstm 66 | local lstm_decoder_network = dmn.LSTM_Decoder(config) 67 | local gru_decoder_network = dmn.GRU_Decoder(config) 68 | 69 | print("Testing lstm decoder with regular lstm") 70 | test_network(lstm_decoder_network, 'lstm') 71 | 72 | print("Testing gru decoder with regular gru") 73 | test_network(gru_decoder_network, 'gru') 74 | 75 | config.lstm_type = 'gf_lstm' 76 | config.gru_type = 'gf_gru' 77 | local lstm_decoder_network = dmn.LSTM_Decoder(config) 78 | local gru_decoder_network = dmn.GRU_Decoder(config) 79 | 80 | print("Testing lstm decoder with gf lstm") 81 | test_network(lstm_decoder_network, 'lstm') 82 | 83 | print("Testing gru decoder with gf gru") 84 | test_network(gru_decoder_network, 'gru') -------------------------------------------------------------------------------- /dmn/tests/batch_lstm_network_test.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Batch forwarding LSTMs test 3 | ]] 4 | 5 | require('..') 6 | 7 | 8 | num_layers = 1 9 | input_size = 100 10 | mem_dim = 100 11 | local input = nn.Identity()() 12 | local ctable_p = nn.Identity()() 13 | local htable_p = nn.Identity()() 14 | 15 | -- multilayer LSTM 16 | local htable, ctable = {}, {} 17 | for layer = 1, num_layers do 18 | local h_p = (num_layers == 1) and htable_p or nn.SelectTable(layer)(htable_p) 19 | local c_p = (num_layers == 1) and ctable_p or nn.SelectTable(layer)(ctable_p) 20 | 21 | local new_gate = function() 22 | local in_module = (layer == 1) 23 | and nn.Linear(input_size, mem_dim)(input) 24 | or nn.Linear(mem_dim, mem_dim)(htable[layer - 1]) 25 | return nn.CAddTable(){ 26 | in_module, 27 | nn.Linear(mem_dim , mem_dim)(h_p) 28 | } 29 | end 30 | 31 | local gf_gate = function() 32 | -- get input module 33 | -- U_i->j*h^i_t-1 34 | local gf_layer = {} 35 | local concatenated_features = (num_layers == 1) and htable_p or nn.JoinTable(1){htable_p} 36 | for j = 1, num_layers do 37 | local in_module = (layer == 1) 38 | and nn.Linear(input_size, mem_dim)(input) 39 | or nn.Linear(mem_dim, mem_dim)(htable[layer - 1]) 40 | 41 | local hidden_concat = nn.Linear(num_layers * mem_dim, 1)(concatenated_features) 42 | local reset_gate = nn.Sigmoid()(nn.CAddTable(){hidden_concat, nn.Linear(input_size, 1)(input)}) 43 | 44 | -- replicate reset gate 45 | local replicated_reset_gate = dmn.Squeeze()(nn.Replicate(mem_dim)(reset_gate)) 46 | 47 | -- g_i->j * U_i->j * h^i_t-1 48 | local curr_sum = nn.CMulTable(){replicated_reset_gate, in_module} 49 | 50 | -- quirks for single-layered things 51 | if num_layers > 1 then 52 | table.insert(gf_layer, curr_sum) 53 | else 54 | gf_layer = curr_sum 55 | end 56 | end 57 | 58 | -- quirks for single-layered things 59 | local summed_gf = (num_layers == 1) and nn.Identity()(gf_layer) or nn.CAddTable()(gf_layer) 60 | return summed_gf 61 | end 62 | 63 | -- input, forget, and output gates 64 | local i = nn.Sigmoid()(new_gate()) 65 | local f = nn.Sigmoid()(new_gate()) 66 | 67 | -- gated feedback update 68 | local update = nn.Tanh()(gf_gate()) 69 | 70 | -- update the state of the LSTM cell 71 | ctable[layer] = nn.CAddTable(){ 72 | nn.CMulTable(){f, c_p}, 73 | nn.CMulTable(){i, update} 74 | } 75 | 76 | -- output gate 77 | local o = nn.Sigmoid()(new_gate()) 78 | htable[layer] = nn.CMulTable(){o, nn.Tanh()(ctable[layer])} 79 | end 80 | 81 | -- if LSTM is single-layered, this makes htable/ctable Tensors (instead of tables). 82 | -- this avoids some quirks with nngraph involving tables of size 1. 83 | htable, ctable = nn.Identity()(htable), nn.Identity()(ctable) 84 | local cell = nn.gModule({input, ctable_p, htable_p}, {ctable, htable}) 85 | 86 | inputs = torch.rand(3, 100) 87 | local new_inputs = {inputs, torch.rand(3, 100), torch.rand(3, 100)} 88 | results = cell:forward(new_inputs) 89 | print(results) 90 | -------------------------------------------------------------------------------- /dmn/tests/batch_lstm_test.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Batch forwarding LSTMs test 3 | ]] 4 | 5 | require('..') 6 | 7 | local config = { 8 | in_dim = 100, 9 | mem_dim = 300, 10 | num_layers = 1, 11 | gpu_mode = false, 12 | dropout = false, 13 | lstm_type = 'lstm', 14 | gru_type = 'gru' 15 | } 16 | 17 | local test_network = function(network, network_type) 18 | local hidden_batch_inputs = network_type == 'lstm' and {torch.rand(50, 300), torch.rand(50, 300)} 19 | or torch.rand(50, 300) 20 | local hidden_single_inputs = network_type == 'lstm' and {torch.rand(300), torch.rand(300)} 21 | or torch.rand(300) 22 | 23 | -- batch of length 200, 50 x 100 inputs. 24 | 25 | local t1 = sys.clock() 26 | results = network:forward(torch.rand(200, 2, 100), hidden_batch_inputs, false) 27 | results1 = network:backward(torch.rand(200, 2, 100), hidden_batch_inputs, false, results) 28 | local t2 = sys.clock() 29 | 30 | --print(results:size()) 31 | print((t2 - t1) / 100) 32 | 33 | local t1 = sys.clock() 34 | results_single = network:forward(torch.rand(200, 100), hidden_single_inputs, false) 35 | results1_single = network:backward(torch.rand(200, 100), hidden_single_inputs, false, results) 36 | 37 | 38 | end 39 | 40 | print("Testing lstms") 41 | -- lstm 42 | local lstm_encoder_network = dmn.LSTM_Encoder(config) 43 | local lstm_decoder_network = dmn.LSTM_Decoder(config) 44 | 45 | print("Testing lstm encoder with regular lstm") 46 | test_network(lstm_encoder_network, 'lstm') 47 | 48 | print("Testing lstm decoder with regular lstm") 49 | test_network(lstm_decoder_network, 'lstm') 50 | 51 | -- gf_lstm 52 | config.lstm_type = 'gf_lstm' 53 | 54 | local lstm_decoder_network = dmn.LSTM_Decoder(config) 55 | local lstm_encoder_network = dmn.LSTM_Encoder(config) 56 | 57 | print("Testing encoder LSTM network with gated-feedback lstms") 58 | --test_network(lstm_encoder_network, 'lstm') 59 | 60 | print("Testing decoder LSTM network with gated-feedback lstms") 61 | --test_network(lstm_decoder_network, 'lstm') 62 | 63 | print("Testing grus") 64 | -- gru 65 | local gru_decoder_network = dmn.GRU_Decoder(config) 66 | local gru_encoder_network = dmn.GRU_Encoder(config) 67 | 68 | print("Testing encoder GRU network with regular gru") 69 | --test_network(gru_encoder_network, 'gru') 70 | 71 | print("Testing decoder GRU network with regular gru") 72 | --test_network(gru_decoder_network, 'gru') 73 | 74 | config.gru_type = 'gf_gru' 75 | 76 | -- gru 77 | local gru_decoder_network = dmn.GRU_Decoder(config) 78 | local gru_encoder_network = dmn.GRU_Encoder(config) 79 | 80 | print("Testing encoder GRU network with gated-feedback gru") 81 | test_network(gru_encoder_network, 'gru') 82 | 83 | print("Testing decoder GRU network with gated-feedback gru") 84 | test_network(gru_decoder_network, 'gru') 85 | -------------------------------------------------------------------------------- /dmn/tests/batch_softmax_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | local network = nn.Sequential() 4 | :add(dmn.BatchReshape()) 5 | :add(nn.Linear(500, 20)) 6 | :add(nn.LogSoftMax()) 7 | 8 | local inputs = torch.rand(3, 5, 500) 9 | local results = network:forward(inputs) 10 | 11 | print(results) 12 | local labels = torch.IntTensor{{1, 2, 3, 4, 5}, 13 | {1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}} 14 | 15 | local reshaped_labels = labels:view(15) 16 | print(reshaped_labels) 17 | print(results) 18 | local criterion = nn.ClassNLLCriterion() 19 | 20 | local err = criterion:forward(results, reshaped_labels) 21 | local input_err = criterion:backward(results, reshaped_labels) 22 | local input_grads = network:backward(inputs, input_err) 23 | 24 | print(input_grads) 25 | 26 | for i = 1, inputs:size(2) do 27 | local cur_input = inputs[{{}, i}] 28 | local cur_label = labels[{{}, i}] 29 | 30 | local single_res = network:forward(cur_input) 31 | local single_err = criterion:forward(single_res, cur_label) 32 | 33 | local cur_input_err = criterion:backward(single_res, cur_label) 34 | local cur_input_grads = network:backward(cur_input, cur_input_err) 35 | 36 | local grad_diff = cur_input_grads - input_grads[{{}, i}] 37 | print(cur_input_grads:cdiv(input_grads[{{}, i}])) 38 | print(torch.abs(grad_diff):sum()) 39 | end -------------------------------------------------------------------------------- /dmn/tests/bleu_test.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Tests whether BLEU works properly 3 | ]] 4 | 5 | require('..') 6 | 7 | local prediction_path = '../datasets/Captioning/piano/test/predictions.txt' 8 | local gold_path = '../datasets/Captioning/piano/test/captions.txt' 9 | 10 | local bleu_results = dmn.eval_functions.bleu(prediction_path, gold_path) 11 | for cur_result in bleu_results do 12 | dmn.logger:print(cur_result) 13 | end 14 | -------------------------------------------------------------------------------- /dmn/tests/coarse_attention_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | -- creates soft attention 4 | local context_size = 2 5 | local rnn_size = 2 6 | local num_layers = 1 7 | local context_size = 2 8 | local input = nn.Identity()() 9 | 10 | local ctable_p = nn.Identity()() 11 | local htable_p = nn.Identity()() 12 | 13 | local context = nn.Identity()() 14 | local first_h_layer = (num_layers == 1) and htable_p or nn.SelectTable(1)(htable_p) 15 | -- Attention is softmax(e_ij) where e_ij = va^T * tanh(Wa*S_i-1 + U_a * h_j) 16 | local perceptroned_context = nn.Linear(context_size, rnn_size)(context) 17 | local added_context = nn.Tanh()(dmn.CRowAddTable(){perceptroned_context, nn.Linear(rnn_size, rnn_size)(first_h_layer)}) 18 | local soft_attention = nn.SoftMax()(dmn.Squeeze()(nn.Linear(rnn_size, 1)(added_context))) 19 | local replicated_attention = nn.Replicate(context_size, 2)(soft_attention) 20 | local summed_context = nn.Sum()(nn.CMulTable(){replicated_attention, context}) 21 | 22 | net = nn.gModule({context, htable_p}, {soft_attention, replicated_attention, summed_context}) 23 | 24 | local res = net:forward({torch.rand(3, 2), torch.rand(2)}) 25 | print(res[1]) 26 | print(res[2]) 27 | print(res[3]) -------------------------------------------------------------------------------- /dmn/tests/context_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | require('cunn') 3 | require('cutorch') 4 | 5 | model = dmn.Context_Captioner_Network.load('trained_models/Context_Captioner_Network_1.th') 6 | model:set_gpu_mode() 7 | img = image.lena() 8 | context = "The cat ran over the board" 9 | beam_size = 1 10 | 11 | model:save("test.th", 1) 12 | new_model = dmn.Context_Captioner_Network.load("test.th") 13 | new_model:set_gpu_mode() 14 | results = model:predict(img, context, beam_size) 15 | 16 | new_results = new_model:predict(img, context, beam_size) 17 | 18 | img_embed_diff = new_model.image_embed_layer:forward(img) 19 | img_embed_prev = model.image_embed_layer:forward(img) 20 | 21 | context = torch.CudaTensor{1, 2, 3, 4, 5} 22 | 23 | loss1, class_predictions1 = new_model.answer_layer:forward(img_embed_diff, context, context, context) 24 | loss, class_predictions = model.answer_layer:forward(img_embed_prev, context, context, context) 25 | 26 | cur_params = model.params 27 | new_params = new_model.params 28 | 29 | local diff = cur_params-new_params 30 | print("Params diff") 31 | print(diff:sum()) 32 | print(loss) 33 | print(loss1) 34 | print(class_predictions1) 35 | print(class_predictions) 36 | 37 | diff = img_embed_diff - img_embed_prev 38 | print(diff:sum()) 39 | print(results) -------------------------------------------------------------------------------- /dmn/tests/dataset_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | dummy_path = 'data/QA/vocab.txt' 3 | vocab_path = 'data/QA/inputs.txt' 4 | 5 | dataset = dmn.read_dataset('data/QA') 6 | print(dataset) -------------------------------------------------------------------------------- /dmn/tests/dmn_predict_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | local model_save_path = "trained_models/DMN_Network_40.th" 3 | local model = dmn.DMN_Network.load(model_save_path) 4 | local prediction = model:predict("In French?", "The answer is far from obvious", 1) 5 | print(prediction) -------------------------------------------------------------------------------- /dmn/tests/dmn_unit_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | -- Testing new dmn unit 4 | local input_size = 100 5 | local gate_size = 50 6 | local mem_size = 100 7 | test_unit = dmn.rnn_units.dmn_unit_new(input_size, mem_size, gate_size) 8 | 9 | local input = torch.rand(mem_size) 10 | local h_prev = torch.rand(mem_size) 11 | local mem = torch.rand(mem_size) 12 | local question = torch.rand(mem_size) 13 | 14 | res = test_unit:forward({input, mem, question}) 15 | print(res) 16 | 17 | -- Testing previous attention units 18 | local input_size = 100 19 | local gate_size = 50 20 | local mem_size = 100 21 | test_unit = dmn.rnn_units.dmn_unit(input_size, mem_size, gate_size) 22 | 23 | local input = torch.rand(mem_size) 24 | local h_prev = torch.rand(mem_size) 25 | local mem = torch.rand(mem_size) 26 | local question = torch.rand(mem_size) 27 | 28 | res = test_unit:forward({input, h_prev, mem, question}) 29 | 30 | print(res) 31 | local err = test_unit:backward({input, h_prev, mem, question}, res) 32 | print("PRINTING ERROR") 33 | print(err) 34 | memory_module = dmn.EpisodicMemory{ 35 | mem_dim = 100, 36 | num_episodes = 10, 37 | gpu_mode = false, 38 | gate_size = 50 39 | } 40 | 41 | -- Fact candidates 42 | local mem_state = torch.rand(100) 43 | local inputs = torch.rand(5, 100) 44 | local question_state = torch.rand(100) 45 | local reverse = false 46 | 47 | local memory = memory_module:forward(inputs, mem_state, question_state, reverse) 48 | 49 | local input_err, mem_err, question_err = 50 | memory_module:backward(inputs, mem_state, question_state, reverse, memory) -------------------------------------------------------------------------------- /dmn/tests/equality_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | -- test tensors 4 | local first_tensor = torch.rand(5) 5 | local second_tensor = torch.rand(5) 6 | 7 | local test_true = dmn.math_functions.equals(first_tensor, first_tensor:clone()) 8 | local test_false = dmn.math_functions.equals(first_tensor, torch.rand(5):zero()) 9 | 10 | assert(test_true) 11 | assert(not test_false) 12 | 13 | -- test tables 14 | local test_true1 = dmn.math_functions.equals({first_tensor}, {first_tensor:clone()}) 15 | local test_false1 = dmn.math_functions.equals({first_tensor, first_tensor}, {first_tensor:clone()}) 16 | local test_false2 = dmn.math_functions.equals({first_tensor}, {first_tensor:clone(), first_tensor:clone()}) 17 | 18 | assert(test_true1) 19 | assert(not test_false1) 20 | assert(not test_false2) 21 | 22 | local test_true2 = dmn.math_functions.equals({first_tensor, second_tensor}, {first_tensor:clone(), second_tensor:clone()}) 23 | local test_false3 = dmn.math_functions.equals({first_tensor, second_tensor}, {first_tensor:clone(), first_tensor:clone()}) 24 | 25 | assert(test_true2) 26 | assert(not test_false3) -------------------------------------------------------------------------------- /dmn/tests/gf_rnn_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | -- code to test basic forwarding of rnn units 4 | function test_unit(input_size, mem_size, num_layers) 5 | test_lstm_unit = dmn.rnn_units.gf_lstm(input_size, mem_size, num_layers) 6 | test_gru_unit = dmn.rnn_units.gf_gru_unit(input_size, mem_size, num_layers) 7 | 8 | graph.dot(test_lstm_unit.fg, 'MLP', 'myMLP') 9 | 10 | local input = torch.rand(input_size) 11 | 12 | local h_prev 13 | local c_prev 14 | 15 | if num_layers == 1 then 16 | h_prev = torch.rand(mem_size) 17 | c_prev = torch.rand(mem_size) 18 | else 19 | h_prev = {} 20 | c_prev = {} 21 | for i = 1, num_layers do 22 | table.insert(h_prev, torch.rand(mem_size)) 23 | table.insert(c_prev, torch.rand(mem_size)) 24 | end 25 | end 26 | local gru_res = test_gru_unit:forward({input, h_prev}) 27 | local lstm_res = test_lstm_unit:forward({input, c_prev, h_prev}) 28 | 29 | print("LSTM RESULT") 30 | print(lstm_res) 31 | print("GRU RESULT") 32 | print(gru_res) 33 | end 34 | 35 | -- try one layer 36 | test_unit(50, 10, 1) 37 | 38 | -- try two layers 39 | test_unit(50, 10, 2) 40 | 41 | -- try 10 layers 42 | test_unit(50, 10, 5) 43 | 44 | 45 | -------------------------------------------------------------------------------- /dmn/tests/grad_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | document_embed_module = dmn.DocumentEmbedModule{ 4 | gpu_mode = false, 5 | emb_dim = 5, 6 | num_classes = 10, 7 | dropout_prob = 0.5, 8 | mem_dim = 3, 9 | num_layers = 5, 10 | dropout = false 11 | } 12 | 13 | -- single layered gru answer module 14 | single_layer_gru_answer_module = dmn.AnswerModule{ 15 | dropout = false, 16 | in_dropout_prob = 0.0, 17 | hidden_dropout_prob = 0.0, 18 | gpu_mode = false, 19 | num_classes = 30, 20 | emb_dim = 5, 21 | input_dim = 2, 22 | mem_dim = 3, 23 | num_layers = 1 24 | } 25 | 26 | -- multilayered gru answer module 27 | multi_layer_gru_answer_module = dmn.AnswerModule{ 28 | dropout = false, 29 | in_dropout_prob = 0.0, 30 | hidden_dropout_prob = 0.0, 31 | gpu_mode = false, 32 | num_classes = 30, 33 | emb_dim = 5, 34 | input_dim = 2, 35 | mem_dim = 3, 36 | num_layers = 10 37 | } 38 | 39 | -- singlelayered question module 40 | single_layer_question_module = dmn.QuestionModule{ 41 | dropout = false, 42 | dropout_prob = 0.0, 43 | gpu_mode = false, 44 | num_classes = 1000, 45 | emb_dim = 2, 46 | mem_dim = 3, 47 | num_layers = 1 48 | } 49 | 50 | -- singlelayered question module 51 | multi_layer_question_module = dmn.QuestionModule{ 52 | dropout = false, 53 | dropout_prob = 0.0, 54 | gpu_mode = false, 55 | num_classes = 1000, 56 | emb_dim = 2, 57 | mem_dim = 3, 58 | num_layers = 10 59 | } 60 | 61 | document_embed_module:grad_check() 62 | single_layer_gru_answer_module:grad_check() 63 | single_layer_question_module:grad_check() 64 | multi_layer_question_module:grad_check() 65 | multi_layer_gru_answer_module:grad_check() 66 | 67 | 68 | -------------------------------------------------------------------------------- /dmn/tests/grayscale.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/dmn/tests/grayscale.jpg -------------------------------------------------------------------------------- /dmn/tests/gru_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | gru_decoder_model = dmn.GRU_Decoder{ 4 | gpu_mode = false, 5 | in_dim = 150, 6 | mem_dim = 100, 7 | dropout_prob = 0.5, 8 | dropout = 0.5, 9 | num_layers = 1 10 | } 11 | 12 | gru_encoder_model = dmn.GRU_Encoder{ 13 | gpu_mode = false, 14 | in_dim = 150, 15 | mem_dim = 100, 16 | dropout_prob = 0.5, 17 | dropout = 0.5, 18 | num_layers = 1 19 | } 20 | 21 | gru_answer_module = dmn.AnswerModule{ 22 | dropout = false, 23 | in_dropout_prob = 0.0, 24 | hidden_dropout_prob = 0.0, 25 | gpu_mode = false, 26 | num_classes = 1000, 27 | emb_dim = 100, 28 | input_dim = 100, 29 | mem_dim = 150, 30 | num_layers = 1 31 | } 32 | 33 | input = torch.IntTensor{1, 2, 3, 4, 5,} 34 | output = torch.IntTensor{5, 3, 2, 1, 6} 35 | 36 | input_decoder = torch.rand(5, 100) 37 | memory = torch.rand(150) 38 | --decoder_results = gru_decoder_model:forward(input_decoder, memory, false) 39 | 40 | err = gru_answer_module:forward(memory, input, output) 41 | bprop = gru_answer_module:backward(memory, input, output) 42 | print(err) 43 | print(bprop) 44 | -------------------------------------------------------------------------------- /dmn/tests/image_loader_test.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Tests image loader test. Makes sure grayscale images are loaded correctly 3 | Assumes it's executed from the python directory 4 | ]] 5 | 6 | require('..') 7 | black_and_white_im_path = 'tests/grayscale.jpg' 8 | 9 | local first_img = image.load(black_and_white_im_path) 10 | local second_img = dmn.image_functions.python_load_image(black_and_white_im_path) 11 | 12 | assert(first_img:size() == second_img:size(), "First image and second image dim must match") 13 | 14 | -- then test super super large images 15 | local large_img_path = '../datasets/Captioning/context/images/Indianapolis_in_1831.png' 16 | large_img = dmn.image_functions.python_load_image(large_img_path) 17 | 18 | 19 | -- Python code 20 | import PIL.Image as Image 21 | import numpy 22 | 23 | tmp = Image.open('../datasets/Captioning/context/images/Indianapolis_in_1831.png') 24 | arr = numpy.array(tmp.size) 25 | tmp.putdata(arr) 26 | 27 | 28 | -------------------------------------------------------------------------------- /dmn/tests/image_statistics_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | local img_path = '../datasets/ImageClassification/cifar10/raw/image_paths.txt' 4 | local tot_img_paths = datasets.read_line_data(img_path) 5 | 6 | local mean, std = dmn.image_functions.compute_mean_std_image_list(tot_img_paths, 10000) 7 | print(mean) 8 | print(std) -------------------------------------------------------------------------------- /dmn/tests/kl_div_test.lua: -------------------------------------------------------------------------------- 1 | require('nn') 2 | 3 | -- KL Divergence test. Turns out target needs to be a valid probability distribution, not a LOG of one. 4 | input = torch.rand(5) 5 | 6 | net = nn.Sequential() 7 | :add(nn.Linear(5, 2)) 8 | :add(nn.LogSoftMax()) 9 | 10 | probabilizer = nn.SoftMax() 11 | log_probabilizzer = nn.LogSoftMax() 12 | 13 | criterion = nn.DistKLDivCriterion() 14 | 15 | res = net:forward(input) 16 | desired = probabilizer:forward(torch.rand(2)) 17 | log_desired = log_probabilizzer:forward(torch.rand(2)) 18 | 19 | err = criterion:forward(res, desired) 20 | err1 = criterion:forward(res, log_desired) 21 | 22 | print(err) 23 | print(err1) 24 | -------------------------------------------------------------------------------- /dmn/tests/load_dssm_test.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | DSSM testing scripts: makes sure that loaded model gives exactly same predictions as trained model 4 | 5 | --]] 6 | 7 | 8 | require('..') 9 | 10 | dmn.logger:header("Testing loading/saving DSSM Network") 11 | 12 | local dataset_path, model_params, dir_params = unpack(require('opts/dssm_opts.lua')) 13 | 14 | if model_params.use_gpu_mode then 15 | dmn.logger:print("Loading gpu modules") 16 | require('cutorch') -- uncomment for GPU mode 17 | require('cunn') -- uncomment for GPU mode 18 | end 19 | 20 | -- Create random model and 21 | local dssm_trainer = softmax.DSSM_Trainer() 22 | local predicate_vocab = {size = 2000, hashed = false} 23 | local entity_vocab = {size = 2000, hashed = false} 24 | local model = dssm_trainer:load_model(model_params, predicate_vocab, entity_vocab) 25 | 26 | local model_save_path = 'dummy_model.th' 27 | model:save(model_save_path, model_params.epochs) 28 | 29 | local loaded_model = dmn.DSSM_Network.load(model_save_path) 30 | local question = torch.IntTensor{1, 2, 3, 6, 7, 2, 3, 120, 293, 120, 239, 491, 230, 129, 203, 45, 345,} 31 | local predicates = {torch.IntTensor{1, 2, 623, 102, 239, 2, 3, 120, 293, 120, 239, 491}, 32 | torch.IntTensor{5, 6, 8, 9, 10, 491, 230, 129, 203, 45, 345}, torch.IntTensor{45, 233, 121, 1234, 33 | 3, 6, 7, 2, 3, 120, 293, }} 34 | 35 | local results = model:predict_tokenized(question, predicates, #predicates) 36 | local loaded_results = loaded_model:predict_tokenized(question, predicates, #predicates) 37 | 38 | for i = 1, #results do 39 | local model_result = results[i][1] 40 | local loaded_model_result = loaded_results[i][1] 41 | dmn.logger:print(model_result .. " " .. loaded_model_result) 42 | assert(model_result == loaded_model_result, "Model results must match") 43 | end 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /dmn/tests/load_qa_test.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Run loading QA model tests 3 | ]] 4 | 5 | require('..') 6 | 7 | -- Tests gpus 8 | gpu_test = {} 9 | 10 | tester = torch.Tester() 11 | answer_vocab = {} 12 | question_vocab = {} 13 | input_vocab = {} 14 | 15 | answer_vocab.hashed = false 16 | answer_vocab.size = 25000 17 | question_vocab.hashed = false 18 | question_vocab.size = 25000 19 | input_vocab.hashed = false 20 | input_vocab.size = 25000 21 | 22 | function gpu_test.Test_Attention() 23 | 24 | local config = { 25 | optim_state = {learningRate = 1e-4}, 26 | optim_method_string = "rmsprop", 27 | gpu_mode = false, 28 | e_vocab = input_vocab, 29 | p_vocab = answer_vocab, 30 | q_vocab = question_vocab, 31 | batch_size = 100, 32 | attention_mem_dim = 100, 33 | attention_num_layers = 1, 34 | question_emb_dim = 200, 35 | question_mem_dim = 100, 36 | question_num_layers = 2, 37 | question_num_classes = question_vocab.size, 38 | question_dropout_prob = 0.5, 39 | question_dropout = false, 40 | entity_emb_dim = 300,--00, 41 | entity_out_dim = 100,--0, 42 | entity_hidden_dim = 200,--0, 43 | entity_in_stride = 1, 44 | entity_in_kernel_width = 2, 45 | entity_hidden_kernel_width = 2, 46 | entity_hidden_stride = 1, 47 | entity_out_kernel_width = 1, 48 | entity_out_stride = 1, 49 | entity_num_classes = input_vocab.size, 50 | entity_dropout_prob = 0.5, 51 | entity_dropout = false, 52 | predicate_emb_dim = 300, 53 | predicate_out_dim = 100, 54 | predicate_hidden_dim = 200, 55 | predicate_in_stride = 1, 56 | predicate_in_kernel_width = 3, 57 | predicate_hidden_kernel_width = 2, 58 | predicate_hidden_stride = 1, 59 | predicate_out_kernel_width = 2, 60 | predicate_out_stride = 1, 61 | predicate_num_classes = question_vocab.size, 62 | predicate_dropout_prob = 0.5, 63 | predicate_dropout = false} 64 | 65 | 66 | -- creates our model 67 | local cpu_model = dmn.Attention_Network(config) 68 | cpu_model:save("test.th") 69 | local loaded_model = dmn.Attention_Network.load("test.th") 70 | 71 | local function forward_inputs(model, question_indices, predicate_indices, entity_indices, corr_entity_index, corr_predicate_index) 72 | output = model:forward(question_indices, predicate_indices, entity_indices, corr_entity_index, corr_predicate_index) 73 | return output 74 | end 75 | 76 | -- Test gpu inputs 77 | -- Create inputs 78 | local question_indices = torch.IntTensor(30):random(5, 5000) 79 | 80 | local table_word_indices = {} 81 | for i = 1, 10 do 82 | local word_indices = torch.IntTensor(100):random(5, 5000) 83 | table.insert(table_word_indices, word_indices) 84 | end 85 | local correct_index = 1 86 | 87 | -- Load/save model 88 | local original_model_res = forward_inputs(cpu_model, question_indices, table_word_indices, table_word_indices, correct_index, correct_index) 89 | local loaded_model_res = forward_inputs(loaded_model, question_indices, table_word_indices, table_word_indices, correct_index, correct_index) 90 | 91 | print(original_model_res) 92 | print(loaded_model_res) 93 | 94 | tester:assertlt(torch.abs(original_model_res - loaded_model_res), 1e-7, 'Difference between outputs must be less than 1e-7') 95 | end 96 | 97 | 98 | tester:add(gpu_test) 99 | tester:run() 100 | 101 | -------------------------------------------------------------------------------- /dmn/tests/mem_network.lua: -------------------------------------------------------------------------------- 1 | require('.') 2 | 3 | local vocab_size = 30 4 | 5 | local dmn_network = dmn.DMN_Network{ 6 | gpu_mode = false, 7 | question_num_classes = vocab_size, 8 | question_emb_dim = 2, 9 | question_in_dim = 2, 10 | question_mem_dim = 5, 11 | question_num_layers = 1, 12 | question_dropout_prob = 0.5, 13 | question_dropout = false, 14 | answer_num_classes = vocab_size, 15 | answer_emb_dim = 2, 16 | answer_input_dim = 2, 17 | answer_mem_dim = 5, 18 | answer_num_layers = 1, 19 | answer_in_dropout_prob = 0.5, 20 | answer_hidden_dropout_prob = 0.5, 21 | answer_dropout = false, 22 | episodic_mem_dim = 5, 23 | episodic_gate_size = 5, 24 | episodic_num_episodes = 5, 25 | semantic_num_classes = vocab_size, 26 | semantic_emb_dim = 2, 27 | semantic_in_dim = 2, 28 | semantic_mem_dim = 5, 29 | semantic_num_layers = 1, 30 | semantic_dropout_prob = 0.5, 31 | semantic_dropout = false 32 | } 33 | 34 | local question_indices = torch.IntTensor{1, 2, 3, 4, 8, 9, 10, 11, 12, 15, 29} 35 | local word_indices = torch.IntTensor{1, 2, 3, 4, 8, 9, 10, 11, 12, 15, 18} 36 | local input_indices = torch.IntTensor{1, 2, 3, 4, 8, 9, 10, 11, 12, 15, 27} 37 | local output_indices = torch.IntTensor{1, 2, 3, 4, 8, 9, 10, 11, 12, 15, 20} 38 | 39 | print("Forwarding network") 40 | local start_time = sys.clock() 41 | local err = dmn_network:forward(question_indices, word_indices, input_indices, output_indices) 42 | local err1 = dmn_network:backward(question_indices, word_indices, input_indices, output_indices) 43 | local end_time = sys.clock() 44 | print(start_time - end_time) 45 | print("Done forwarding network") 46 | 47 | dmn_network:grad_check() 48 | 49 | -------------------------------------------------------------------------------- /dmn/tests/padding_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | local paddedJoinTable = dmn.PaddedJoinTable(0) 4 | local inputs = {torch.rand(5), torch.rand(2)} 5 | local desired = torch.rand(2, 5) 6 | 7 | local res = paddedJoinTable:forward(inputs) 8 | local back = paddedJoinTable:backward(inputs, res) 9 | 10 | print(res) 11 | print(back) 12 | --[[local params, grad_params = paddedJoinTable:getParameters() 13 | local currIndex = 0 14 | local loss_function = nn.MSECriterion() 15 | 16 | local feval = function(x) 17 | grad_params:zero() 18 | local res = paddedJoinTable:forward(inputs) 19 | local err = loss_function:forward(res, desired) 20 | local err1 = loss_function:backward(res, desired) 21 | local input_errs = paddedJoinTable:backward(inputs, err1) 22 | 23 | currIndex = currIndex + 1 24 | print(currIndex, " of ", params:size()) 25 | print(loss) 26 | return loss, grad_params 27 | end 28 | 29 | -- check gradients for lstm layer 30 | diff, DC, DC_est = optim.checkgrad(feval, params, 1e-7) 31 | print("Gradient error for dmn network is") 32 | print(diff) 33 | assert(diff < 1e-5, "Gradient is greater than tolerance") 34 | ]] 35 | -------------------------------------------------------------------------------- /dmn/tests/parallelApply.lua: -------------------------------------------------------------------------------- 1 | require('nn') 2 | 3 | local inputs = torch.rand(5, 3, 2) 4 | local mask = 5 | 6 | --module = nn.ParallelTable() 7 | -------------------------------------------------------------------------------- /dmn/tests/probability_interpolation_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | -- Tests that probability interpolations work as expected 4 | 5 | local input = torch.DoubleTensor{0.1,0.2,0.3,0.4} 6 | local desired_class = torch.IntTensor{1} 7 | 8 | local first_interpolation = dmn.math_functions.probability_interpolation(desired_class, 9 | input, 10 | -1, 11 | 1.0) 12 | 13 | print(first_interpolation) 14 | assert(first_interpolation[1] == 1) 15 | 16 | local second_interpolation = dmn.math_functions.probability_interpolation(desired_class, 17 | input, 18 | 4, 19 | 0.0) 20 | 21 | print(second_interpolation) 22 | 23 | local third_interpolation = dmn.math_functions.probability_interpolation(desired_class, 24 | input, 25 | 2, 26 | 0.5) 27 | 28 | print(third_interpolation) -------------------------------------------------------------------------------- /dmn/tests/qa_attention_test.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Tests qa attention model: that attention it gives out is legit 3 | ]] 4 | 5 | require('..') 6 | 7 | 8 | local cur_model = dmn.Attention_Network.load('test_model.th') 9 | 10 | local question = "When was foo born" 11 | local entities = {"FOO", "bar"} 12 | local predicates = {"people/person/location", "people/person/born_in"} 13 | 14 | local rankings, likelihoods, question_pred_focuses, question_entity_focuses 15 | = softmax.qa_api:model_align(cur_model, question, predicates, entities, 2) 16 | 17 | print(rankings) 18 | print(likelihoods) 19 | print(question_pred_focuses[1]) 20 | print(question_entity_focuses[1]) 21 | 22 | -------------------------------------------------------------------------------- /dmn/tests/read_from_shell_test.lua: -------------------------------------------------------------------------------- 1 | --Reads stuff from shell test 2 | 3 | require('../../dmn') 4 | 5 | -------------------------------------------------------------------------------- /dmn/tests/substring_test.lua: -------------------------------------------------------------------------------- 1 | require('.') 2 | hashed_items = {} 3 | items = "#" .. "foo-bar" .. "#" 4 | for i = 1, #items - 2 do 5 | table.insert(hashed_items, items:sub(i, i + 2)) 6 | end 7 | 8 | print (hashed_items) 9 | 10 | local dir = 'data/Translation/train/' 11 | local input_vocab = dmn.HashVocab(dir .. 'input_vocab.txt', true) 12 | local items = input_vocab:index("cats") 13 | 14 | print(items) 15 | print("Done") -------------------------------------------------------------------------------- /dmn/tests/temporal_convolution_test.lua: -------------------------------------------------------------------------------- 1 | require('nn') 2 | 3 | print("==== Testing temporal convolutions ====") 4 | 5 | local input = torch.rand(5, 500) 6 | local conv_network = nn.TemporalConvolution(500, 100, 2, 1) 7 | local res = conv_network:forward(input) 8 | print(res) 9 | 10 | inp=5; -- dimensionality of one sequence element 11 | outp=1; -- number of derived features for one sequence element 12 | kw=1; -- kernel only operates on one sequence element per step 13 | dw=1; -- we step once and go on to the next sequence element 14 | 15 | mlp=nn.TemporalConvolution(inp,outp,kw,dw) 16 | 17 | x=torch.rand(7,inp) -- a sequence of 7 elements 18 | print(mlp:forward(x)) 19 | 20 | y=torch.rand(15,inp) -- a sequence of 15 elements 21 | print(mlp:forward(y)) 22 | 23 | print(mlp:forward(x) - mlp:forward(y)) 24 | 25 | -------------------------------------------------------------------------------- /dmn/tests/tensor_partition_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | 3 | local tensor = torch.rand(8, 2) 4 | 5 | local indices = {dmn.constants.TRAIN_INDEX, 6 | dmn.constants.TRAIN_INDEX, 7 | dmn.constants.VAL_INDEX, 8 | dmn.constants.TEST_INDEX, 9 | dmn.constants.TEST_INDEX, 10 | dmn.constants.TRAIN_INDEX, 11 | dmn.constants.VAL_INDEX, 12 | dmn.constants.TEST_INDEX} 13 | 14 | local train_tensor, val_tensor, test_tensor = 15 | dmn.functions.partition_tensor(tensor, indices) 16 | 17 | print(train_tensor) 18 | print(val_tensor) 19 | print(test_tensor) 20 | 21 | print(tensor) -------------------------------------------------------------------------------- /dmn/tests/vision_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | tmp = dmn.ImageEmbedModule{ 3 | num_classes = 1000, 4 | network_type = "resnet_152", 5 | shortcut_type = "C", 6 | gpu_mode = false, 7 | classify = false, 8 | load_weights = false 9 | } 10 | 11 | inputs = image.lena() 12 | results = tmp:forward(inputs) 13 | 14 | print(results) -------------------------------------------------------------------------------- /dmn/tests/vocab_hash_test.lua: -------------------------------------------------------------------------------- 1 | require('..') 2 | local vocab_path = 'data/Translation/train/input_vocab.txt' 3 | local hash_vocab = dmn.HashVocab(vocab_path, true) 4 | local input_layer = dmn.HashLayer{ 5 | emb_dim = 300, 6 | dropout_prob = 0.5, 7 | gpu_mode = false, 8 | dropout = false, 9 | vocab = hash_vocab 10 | } 11 | local inputs = {"La", "Foo", "bar"} 12 | local latent_semantic = input_layer:forward(inputs) 13 | print(latent_semantic) 14 | local errs = input_layer:backward(inputs, latent_semantic) 15 | print(errs) 16 | 17 | -------------------------------------------------------------------------------- /dmn/trained_models/Attention_Network_char_level_true_attention_type_coarse_fixed_35.th: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/dmn/trained_models/Attention_Network_char_level_true_attention_type_coarse_fixed_35.th -------------------------------------------------------------------------------- /dmn/util/constants.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Constants 3 | ]] 4 | 5 | local constants = torch.class('dmn.constants') 6 | 7 | constants.TRAIN_INDEX = 0 8 | constants.VAL_INDEX = 1 9 | constants.TEST_INDEX = 2 10 | constants.TRAIN_FRACTION = 0.6 11 | 12 | -- error constant 13 | constants.ERROR_CONSTANT = 'ERROR OCCURED' 14 | 15 | -- number of retries for request 16 | constants.NUM_RETRIES = 5 17 | 18 | -- Constants for context image captioning 19 | constants.PER = 'PERSON' 20 | constants.ORG = 'ORGANIZATION' 21 | constants.MISC = 'MISCELLANEOUS' 22 | constants.LOC = 'LOCATION' 23 | constants.YEAR = 'TIMEOFYEAR' 24 | constants.MONTH = 'TIMEMONTH' 25 | 26 | constants.NO_CONTEXT = '' 27 | 28 | -- Authentication constants 29 | constants.USERNAME = 'foo' 30 | constants.PASSWORD = 'bar' 31 | 32 | -- Local constants 33 | constants.LOCAL_MODEL_DIR = 'models/' 34 | 35 | -- Cloud constants 36 | constants.CLOUD_MODEL_DIR = 'softmax_models' 37 | constants.CLOUD_LOG_DIR = 'softmax_logs' 38 | constants.CLOUD_PREDICTIONS_DIR = 'softmax_predictions' 39 | 40 | -- Directory constants 41 | constants.MAIN_PATH = '../' 42 | 43 | -- URL constants for logs 44 | constants.JOB_ENDPOINT = 'http://ec2-52-33-179-156.us-west-2.compute.amazonaws.com:8000/api/v1/' 45 | --'http://127.0.0.1:8000/api/v1/' 46 | --'http://127.0.0.1:8000/api/' 47 | -- For tokenizing 48 | constants.CHAR_LEVEL = true 49 | constants.WORD_LEVEL = false 50 | constants.NO_TOKENIZATION = 2 51 | 52 | -- For loading model 53 | constants.IMAGE_CLASSIFICATION_SHOP_APP_TAG_ID = 3609 54 | constants.IMAGE_CLASSIFICATION_SHOP_APP_COLOR_ID = 3544 55 | constants.CONTEXT_DESCRIBE_ID = 1 56 | constants.DMN_ID = 1 57 | constants.DSSM_ID = 1 58 | constants.CONTEXT_DSSM_ID = 1 59 | 60 | -- For classification 61 | constants.CLASSIFY_SINGLE_CLASS = 'CLASSIFY_SINGLE_CLASS' 62 | constants.CLASSIFY_MULTI_CLASS = 'CLASSIFY_MULTI_CLASS' 63 | constants.CLASSIFY_TRANSFER_LEARNING = 'CLASSIFY_TRANSFER_LEARNING' 64 | 65 | constants.RERANK_NCE_CRITERION = 'RERANK_NCE_CRITERION' 66 | constants.RERANK_SOFTMAX_CRITERION = 'RERANK_SOFTMAX_CRITERION' 67 | 68 | -- For dataset types 69 | constants.DATASET_VALIDATION_TYPE = 'DATASET_VALIDATION_TYPE' 70 | constants.DATASET_TRAINING_TYPE = 'DATASET_TRAINING_TYPE' 71 | constants.DATASET_TESTING_TYPE = 'DATASET_TESTING_TYPE' 72 | 73 | -- returns network from string representation, useful for loading models 74 | function constants.get_network(string_name) 75 | assert(string_name ~= nil, "Must specify name of network") 76 | local net 77 | if string_name == 'dmn.Attention_Network' then 78 | return dmn.Attention_Network 79 | elseif string_name == 'dmn.Captioner_Network' then 80 | return dmn.Captioner_Network 81 | elseif string_name == 'dmn.Context_Captioner_Network' then 82 | return dmn.Context_Captioner_Network 83 | elseif string_name == 'dmn.Context_DSSM_Network' then 84 | return dmn.Context_DSSM_Network 85 | elseif string_name == 'dmn.DMN_Network' then 86 | return dmn.DMN_Network 87 | elseif string_name == 'dmn.Image_Classification_Network' then 88 | return dmn.Image_Classification_Network 89 | elseif string_name == 'dmn.Image_Classification_Network' then 90 | return dmn.Image_Classification_Network 91 | else 92 | error("Invalid network type " .. string_name .. " specified") 93 | end 94 | end -------------------------------------------------------------------------------- /dmn/util/eval_functions.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Eval functions 3 | ]] 4 | 5 | local EvalFunctions = torch.class('dmn.eval_functions') 6 | 7 | -- Calculates BLEU scores on train, test and val sets 8 | function EvalFunctions.bleu(predictions_path, gold_path) 9 | assert(predictions_path ~= nil, "Must specify predictions path to use") 10 | assert(gold_path ~= nil, "Must specify gold path to use") 11 | 12 | dmn.logger:print("Evaluating bleu score") 13 | 14 | -- First makes sure that we have no error 15 | local predict_lines = datasets.read_line_data(predictions_path) 16 | local gold_lines = datasets.read_line_data(gold_path) 17 | 18 | -- check if gold lines has a \t character, in which case we want to split the gold captions for the bleu score testing 19 | 20 | local cleaned_pred_path = "../tmp/tmp_pred" .. sys.clock() .. ".txt" 21 | local cleaned_gold_path = "../tmp/tmp_gold" .. sys.clock() .. ".txt" 22 | 23 | local cleaned_predict_lines = {} 24 | local cleaned_gold_lines = {} 25 | 26 | for i = 1, #predict_lines do 27 | if predict_lines[i] ~= dmn.constants.ERROR_CONSTANT then 28 | local lowered_prediction = predict_lines[i]:lower() 29 | local trimmed_prediction = dmn.functions.string_trim(lowered_prediction) 30 | table.insert(cleaned_predict_lines, trimmed_prediction) 31 | -- able to handle both "\t" and non-tabbed data 32 | local gold_captions = string.split(gold_lines[i], "\t") 33 | for i = 1, #gold_captions do 34 | if cleaned_gold_lines[i] == nil then 35 | cleaned_gold_lines[i] = {} 36 | end 37 | local lowered_gold_caption = gold_captions[i]:lower() 38 | local trimmed_gold_caption = dmn.functions.string_trim(lowered_gold_caption) 39 | table.insert(cleaned_gold_lines[i], trimmed_gold_caption) 40 | end 41 | end 42 | end 43 | 44 | local start_count = #cleaned_gold_lines[1] 45 | 46 | for i = 1, #cleaned_gold_lines do 47 | local cur_index = i - 1 48 | local cur_save_path = cleaned_gold_path .. cur_index 49 | local cur_lines = cleaned_gold_lines[i] 50 | 51 | -- make sure that we only save scores that have equal number of lines to the start (otherwise BLEU messes up) 52 | if #cur_lines == start_count then 53 | datasets.save_line_data(cur_lines, cur_save_path) 54 | end 55 | end 56 | datasets.save_line_data(cleaned_predict_lines, cleaned_pred_path) 57 | 58 | local results = io.popen("../dmn/eval/run_bleu.sh " .. cleaned_pred_path .. ' ' .. cleaned_gold_path) 59 | local bleu_scores = results:lines() 60 | 61 | return bleu_scores 62 | end -------------------------------------------------------------------------------- /dmn/util/io_functions.lua: -------------------------------------------------------------------------------- 1 | 2 | local functions = torch.class('dmn.io_functions') 3 | 4 | -- decodes json 5 | function functions.json_decode(html) 6 | assert(html ~= nil, "must specify html to decode") 7 | res = cjson.decode(html) 8 | return res 9 | end 10 | 11 | function functions.load_image(img_path, library_to_use) 12 | assert(img_path ~= nil, "Must specify image path to load from") 13 | local lib 14 | 15 | -- get library to use 16 | if library_to_use == nil then 17 | if python ~= nil then 18 | lib = 'python' 19 | else 20 | lib = 'image' 21 | end 22 | else 23 | lib = library_to_use 24 | end 25 | 26 | local new_img = nil 27 | local img 28 | --dmn.logger:print("Trying to load image from " .. img_path) 29 | -- Hacky error handling for image load issues 30 | functions.trycatch( 31 | function() 32 | if lib == 'gm' then 33 | img = gm.load(img_path, 'double') 34 | elseif lib == 'python' then 35 | img = dmn.image_functions.python_load_image(img_path) 36 | else 37 | local ok, input = pcall(function() 38 | img = image.load(img_path, 3, 'double') 39 | end) 40 | 41 | -- Sometimes image.load fails because the file extension does not match the 42 | -- image format. In that case, use image.decompress on a ByteTensor. 43 | if not ok then 44 | local f = io.open(img_path, 'r') 45 | assert(f, 'Error reading: ' .. tostring(img_path)) 46 | local data = f:read('*a') 47 | f:close() 48 | 49 | local b = torch.ByteTensor(string.len(data)) 50 | ffi.copy(b:data(), data, b:size(1)) 51 | 52 | img = image.decompress(b, 3, 'double') 53 | end 54 | end 55 | if img:size(1) == 1 then 56 | --dmn.logger:print("Converting grayscale image to color") 57 | new_img = torch.zeros(3, img:size(2), img:size(3)) 58 | new_img[1] = img[1] 59 | new_img[2] = img[1] 60 | new_img[3] = img[1] 61 | elseif img:size(1) == 3 then 62 | new_img = img 63 | else 64 | local cur_size = img:size(1) 65 | error("Only operating on color or greyscale images " .. cur_size) 66 | end 67 | --dmn.logger:print("Successfully loaded image from " .. img_path) 68 | end, 69 | function(err) 70 | dmn.logger:print("ERROR OCCURED LOADING IMAGE from " .. img_path .. " " .. err) 71 | new_img = nil 72 | end) 73 | return new_img 74 | end 75 | 76 | function functions.file_exists(file_path) 77 | assert(file_path ~= nil, "Must specify file path to check") 78 | 79 | -- file exists if it has attributes 80 | local file_exists = lfs.attributes(file_path) 81 | return file_exists 82 | end 83 | 84 | -- check if folder exists 85 | function functions.check_folder(base_dir) 86 | assert(base_dir ~= nil, "Must specify folder name to check") 87 | if lfs.attributes(base_dir) == nil then 88 | print("Directory not found, making new directory at " .. base_dir) 89 | lfs.mkdir(base_dir) 90 | end 91 | end 92 | 93 | -- executes command and returns all print lines 94 | function functions.execute_command(command) 95 | assert(command ~= nil, "Must specify command to execute") 96 | local handle = io.popen(command) 97 | local result = handle:lines() 98 | 99 | handle:close() 100 | return result 101 | end 102 | 103 | -- Creates a new post request to specified url, returns body result 104 | function functions.post_request(url, request_body) 105 | assert(url ~= nil, "Must specify url to send a POST request to") 106 | assert(request_body ~= nil, "Must specify body data to post") 107 | 108 | --dmn.logger:print("Sending POST request to " .. url 109 | -- .. " with data " .. tostring(request_body)) 110 | 111 | local response_body = { } 112 | local res, code, response_headers = http.request 113 | { 114 | url = url; 115 | method = "POST"; 116 | headers = 117 | { 118 | ["Content-Type"] = "application/json"; 119 | ["Content-Length"] = #request_body; 120 | }; 121 | source = ltn12.source.string(request_body); 122 | sink = ltn12.sink.table(response_body); 123 | } 124 | 125 | assert(response_body ~= nil, "Error sending job id") 126 | return response_body 127 | 128 | end 129 | 130 | function functions.url_encode(str) 131 | assert(str ~= nil, "Must specify string to encode") 132 | if (str) then 133 | str = string.gsub (str, "\n", "\r\n") 134 | str = string.gsub (str, "([^%w ])", 135 | function (c) return string.format ("%%%02X", string.byte(c)) end) 136 | str = string.gsub (str, " ", "+") 137 | end 138 | return str 139 | end 140 | 141 | function functions.trycatch(try,catch) 142 | local ok,err = pcall(try) 143 | if not ok then catch(err) end 144 | end 145 | 146 | -- returns a table of all files in a directory 147 | function functions.list_files(dir) 148 | local files = {} 149 | local p = io.popen('find "'..dir..'" -type f') --Open directory look for files, save data in p. By giving '-type f' as parameter, it returns all files. 150 | for file in p:lines() do --Loop through all files 151 | table.insert(files, file) 152 | end 153 | return files 154 | end 155 | 156 | -- returns file name of an absolute path 157 | function functions.file_name(abs_path) 158 | assert(abs_path ~= nil, "Must specify absolute path to use") 159 | local dir_path, file_name, ext = 160 | string.match(abs_path, "(.-)([^\\/]-%.?([^%.\\/]*))$") 161 | return file_name 162 | end 163 | 164 | function functions.http_request(url) 165 | assert(url ~= nil, "Must specify url to get") 166 | local r, c, headers = http.request(url) 167 | return r, c, headers 168 | end 169 | 170 | function functions.https_request(url) 171 | assert(url ~= nil, "Must specify https url to explore") 172 | local resp = {} 173 | 174 | https.TIMEOUT = 10 175 | local r, c, headers, s = https.request{ 176 | url = url, 177 | sink = ltn12.sink.table(resp), 178 | protocol = "tlsv1" 179 | } 180 | 181 | local res = table.concat(resp) 182 | local trimmed_res = res:gsub("\\u","") 183 | local trimmed_res = trimmed_res:gsub("\\n", "") 184 | local trimmed_res = trimmed_res:gsub("@@", "") 185 | 186 | return res, r, headers 187 | end -------------------------------------------------------------------------------- /dmn/util/logger.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Logger that logs for all the other loggers 3 | ]] 4 | 5 | local Logger = torch.class('dmn.Logger') 6 | 7 | function Logger:__init(config) 8 | self.loggers = {} 9 | end 10 | 11 | function Logger:add_logger(logger) 12 | assert(logger ~= nil, "Must specify logger to add") 13 | table.insert(self.loggers, logger) 14 | end 15 | 16 | function Logger:log(data) 17 | for i = 1, #self.loggers do 18 | cur_logger = self.loggers[i] 19 | cur_logger:log(data) 20 | end 21 | end 22 | 23 | function Logger:print(data, ...) 24 | for i = 1, #self.loggers do 25 | cur_logger = self.loggers[i] 26 | cur_logger:print(data, ...) 27 | end 28 | end 29 | 30 | function Logger:printf(data, ...) 31 | for i = 1, #self.loggers do 32 | cur_logger = self.loggers[i] 33 | cur_logger:printf(data, ...) 34 | end 35 | end 36 | 37 | function Logger:header(s) 38 | for i = 1, #self.loggers do 39 | cur_logger = self.loggers[i] 40 | cur_logger:header(s) 41 | end 42 | end -------------------------------------------------------------------------------- /dmn/util/print_logger.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Helper functions 3 | ]] 4 | 5 | local Logger = torch.class('dmn.PrintLogger') 6 | 7 | function Logger:__init(config) 8 | end 9 | 10 | function Logger:log(data) 11 | print(data) 12 | end 13 | 14 | function Logger:print(data, ...) 15 | print(data, ...) 16 | end 17 | 18 | function Logger:printf(data, ...) 19 | utils.printf(data, ...) 20 | end 21 | 22 | function Logger:header(s) 23 | print(string.rep('-', 80)) 24 | print(s) 25 | print(string.rep('-', 80)) 26 | end -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Copyright 2017 David Golub 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/README.md: -------------------------------------------------------------------------------- 1 | Installation 2 | ------------ 3 | 4 | ``` 5 | git clone git@github.com:theodo/flask-boilerplate.git && cd flask-boilerplate 6 | docker-compose run --rm server pip install -r requirements.txt --user --upgrade 7 | docker-compose up -d server 8 | ``` 9 | 10 | Accessing containers 11 | -------------------- 12 | 13 | Require Docker >= 1.3 14 | 15 | ```shell 16 | # use 'docker ps' to see the list of your containers 17 | docker exec -it flaskboilerplate_db_1 psql -Upostgres 18 | docker exec -it flaskboilerplate_server_1 bash 19 | ``` 20 | 21 | Migration process 22 | ----------------- 23 | 24 | ```shell 25 | # Prior to the first migration 26 | docker-compose run --rm server python src/manage.py db init 27 | 28 | # Create a new version of the database 29 | docker-compose run --rm server python src/manage.py db migrate 30 | # check file + remove comment + improve file if needed 31 | sudo vim migration/versions/.py 32 | 33 | # Upgrade your database to the last version 34 | docker-compose run --rm server python src/manage.py db upgrade 35 | ``` 36 | 37 | Run tests 38 | --------- 39 | 40 | ```shell 41 | docker-compose run --rm server python -m unittest 42 | ``` 43 | 44 | Commands 45 | -------- 46 | 47 | ```shell 48 | # Screenshot of python vendors 49 | docker-compose run --rm server pip freeze > requirements.txt 50 | 51 | # Run a command in the server container: 52 | docker-compose run --rm server 53 | ``` 54 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/docker-compose.yml: -------------------------------------------------------------------------------- 1 | server: 2 | image: python:2.7 3 | working_dir: /mnt 4 | volumes: 5 | - .:/mnt 6 | links: 7 | - db 8 | ports: 9 | - '5000:5000' 10 | environment: 11 | PYTHONPATH: $PYTHONPATH:src 12 | PYTHONUSERBASE: /mnt/vendor 13 | IMPORT_PATH: /mnt/test/fixture 14 | command: python src/server.py 15 | 16 | db: 17 | image: postgres:9.4 18 | volumes: 19 | - shared/data:/var/lib/postgresql/data 20 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/requirements.txt: -------------------------------------------------------------------------------- 1 | alembic==0.7.4 2 | aniso8601==0.92 3 | Flask==0.10.1 4 | Flask-Cors==1.8.0 5 | flask-marshmallow==0.4.0 6 | Flask-Migrate==1.2.0 7 | Flask-RESTful==0.2.12 8 | Flask-Script==2.0.5 9 | Flask-SQLAlchemy==2.0 10 | itsdangerous==0.24 11 | Jinja2==2.7.3 12 | Mako==1.0.1 13 | MarkupSafe==0.23 14 | marshmallow==1.2.2 15 | mock==1.0.1 16 | psycopg2==2.5.4 17 | pytz==2014.10 18 | requests==2.3.0 19 | six==1.9.0 20 | SQLAlchemy==0.9.8 21 | Werkzeug==0.10.1 22 | elasticsearch -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/.DS_Store~668f33dd2e7a36f45dece9e86a3a8ea0fe95588f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/servers/FreebaseWebServer/src/.DS_Store~668f33dd2e7a36f45dece9e86a3a8ea0fe95588f -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/.DS_Store~937255f17dd28d0aeb532c7c6b9a28604c71306c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/servers/FreebaseWebServer/src/.DS_Store~937255f17dd28d0aeb532c7c6b9a28604c71306c -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/.DS_Store~HEAD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/servers/FreebaseWebServer/src/.DS_Store~HEAD -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/.DS_Store~HEAD_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/servers/FreebaseWebServer/src/.DS_Store~HEAD_0 -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/servers/FreebaseWebServer/src/client/__init__.py -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/client/superhero.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a client of another fake API. 3 | 4 | It is used in this boilerplate to show how to mock an external API and how to 5 | chaine errors between APIs. 6 | """ 7 | import requests 8 | 9 | import config 10 | 11 | def is_superhero(email): 12 | response = requests.get(config.SUPERHERO_API_URL + '/superhero/%s' % email) 13 | 14 | if response.status_code == 404: 15 | return False 16 | elif response.status_code == 200: 17 | return True 18 | else: 19 | raise Exception('An error occured in the superhero API') 20 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/config.py: -------------------------------------------------------------------------------- 1 | import os, logging 2 | from util.freebase_helper import FreebaseHelper 3 | 4 | DEBUG = True 5 | HOST = os.getenv('HOST', '0.0.0.0') 6 | PORT = int(os.getenv('PORT', '5000')) 7 | 8 | POSTGRES = { 9 | 'user': os.getenv('POSTGRES_USER', 'postgres'), 10 | 'pw': os.getenv('POSTGRES_PW', ''), 11 | 'host': os.getenv('POSTGRES_HOST', os.getenv('DB_PORT_5432_TCP_ADDR')), 12 | 'port': os.getenv('POSTGRES_PORT', os.getenv('DB_PORT_5432_TCP_PORT')), 13 | 'db': os.getenv('POSTGRES_DB', 'postgres'), 14 | } 15 | DB_URI = 'postgresql://%(user)s:%(pw)s@%(host)s:%(port)s/%(db)s' % POSTGRES 16 | logging.basicConfig( 17 | filename=os.getenv('SERVICE_LOG', 'server.log'), 18 | level=logging.DEBUG, 19 | format='%(levelname)s: %(asctime)s pid:%(process)s module:%(module)s %(message)s', 20 | datefmt='%d/%m/%y %H:%M:%S', 21 | ) 22 | 23 | FREEBASE_HELPER = FreebaseHelper( 24 | ip_addresses=FreebaseHelper.FREEBASE_IP, 25 | create_index=False, 26 | timeout=60 #['192.168.99.100:32769'], False 27 | ) 28 | 29 | FREEBASE_HELPER.set_index(FreebaseHelper.FREEBASE_2M) 30 | 31 | SUPERHERO_API_URL = os.getenv('HOST', '127.0.0.1:5001') 32 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/manage.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask.ext.script import Manager 3 | from flask.ext.migrate import Migrate, MigrateCommand 4 | 5 | import config 6 | from model.abc import db 7 | 8 | server = Flask(__name__) 9 | server.debug = config.DEBUG 10 | server.config['SQLALCHEMY_DATABASE_URI'] = config.DB_URI 11 | db.init_app(server) 12 | 13 | migrate = Migrate(server, db) 14 | manager = Manager(server) 15 | manager.add_command('db', MigrateCommand) 16 | 17 | if __name__ == '__main__': 18 | manager.run() 19 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .user import User 2 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/model/abc.py: -------------------------------------------------------------------------------- 1 | """Define an Abstract Base Class (ABC) for models.""" 2 | import datetime 3 | from weakref import WeakValueDictionary 4 | from flask.ext.sqlalchemy import SQLAlchemy 5 | from sqlalchemy import inspect 6 | from sqlalchemy.orm import aliased 7 | 8 | 9 | db = SQLAlchemy() 10 | 11 | 12 | class MetaBaseModel(db.Model.__class__): 13 | """ Define a metaclass for the BaseModel to implement `__getitem__` for managing aliases """ 14 | 15 | def __init__(cls, *args): 16 | super().__init__(*args) 17 | cls.aliases = WeakValueDictionary() 18 | 19 | def __getitem__(cls, key): 20 | try: 21 | alias = cls.aliases[key] 22 | except KeyError: 23 | alias = aliased(cls) 24 | cls.aliases[key] = alias 25 | return alias 26 | 27 | 28 | class BaseModel(): 29 | """ Generalize __init__, __repr__ and to_json 30 | Based on the models columns """ 31 | 32 | print_filter = () 33 | def __repr__(self): 34 | """ Define a base way to print models 35 | Columns inside `print_filter` are excluded """ 36 | return '%s(%s)' % (self.__class__.__name__, { 37 | column: value 38 | for column, value in self._to_dict().items() 39 | if column not in self.print_filter 40 | }) 41 | 42 | to_json_filter = () 43 | @property 44 | def json(self): 45 | """ Define a base way to jsonify models 46 | Columns inside `to_json_filter` are excluded """ 47 | return { 48 | column: value if not isinstance(value, datetime.date) else value.isoformat() 49 | for column, value in self._to_dict().items() 50 | if column not in self.to_json_filter 51 | } 52 | 53 | def _to_dict(self): 54 | """ This would more or less be the same as a `to_json` 55 | But putting it in a "private" function 56 | Allows to_json to be overriden without impacting __repr__ 57 | Or the other way around 58 | And to add filter lists """ 59 | return { 60 | column.key: getattr(self, column.key) 61 | for column in inspect(self.__class__).attrs 62 | } 63 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/model/user.py: -------------------------------------------------------------------------------- 1 | from werkzeug.security import generate_password_hash, check_password_hash 2 | 3 | from .abc import db, BaseModel 4 | 5 | 6 | class User(db.Model, BaseModel): 7 | __tablename__ = 'auth_user' 8 | 9 | id = db.Column(db.Integer, primary_key=True) 10 | email = db.Column(db.String(120), unique=True) 11 | password = db.Column(db.String(120)) 12 | 13 | def __init__(self, email=None, password=None): 14 | if email: 15 | self.email = email.lower() 16 | if password: 17 | self.set_password(password) 18 | 19 | def set_password(self, password): 20 | self.password = generate_password_hash(password) 21 | 22 | def check_password(self, password): 23 | return check_password_hash(self.password, password) 24 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/resource/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/servers/FreebaseWebServer/src/resource/__init__.py -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/resource/freebase.py: -------------------------------------------------------------------------------- 1 | from flask import jsonify 2 | from flask.ext.restful import Resource 3 | from flask import request 4 | from flask_restful import reqparse 5 | 6 | import re 7 | from model.abc import db 8 | from model import User 9 | import config 10 | from util.freebase import FreebaseObject, FreebaseFact 11 | import util.tokenizer as tokenizer 12 | 13 | 14 | class FreebaseNameAPI(Resource): 15 | # Gets all of the names for specified query 16 | def get(self): 17 | # Get query and number of desired results 18 | #try: 19 | query = request.args.get('query') 20 | remove_stopwords = request.args.get('remove_stopwords') 21 | 22 | # Replace '\'' 23 | query = query.replace('\'', " ") 24 | query = tokenizer.replace_accents(query.encode('utf-8')) 25 | 26 | removed_stopwords_query = tokenizer.remove_stopwords(query) 27 | num_results = request.args.get('num_results') 28 | 29 | fb_helper = config.FREEBASE_HELPER 30 | 31 | names, num_items = fb_helper.get_names(removed_stopwords_query, num_results) 32 | jsoned_names = [{'freebase_name': name.freebase_name, 'freebase_id': name.freebase_id} for name in names] 33 | topic_ids = [name.freebase_id for name in names] 34 | topic_names = [name.freebase_name for name in names] 35 | 36 | # Only keep names that have an alias in the sentence 37 | cleaned_names = tokenizer.clean_name_arr(topic_names, query) 38 | removed_substring_names = tokenizer.remove_substrings_arr(cleaned_names) 39 | cleaned_jsoned_names = filter(lambda name: name['freebase_name'] \ 40 | in set(removed_substring_names), jsoned_names) 41 | return jsonify(result=cleaned_jsoned_names,cleaned_names=cleaned_names,raw_names=jsoned_names,num_items=num_items) 42 | #except Exception as e: 43 | # print("Error requesting %s" % e) 44 | # jsoned_names = [{'freebase_name': "NONE", 'freebase_id': '/m/test'}] 45 | # return jsonify(result=jsoned_names,num_items=num_items) 46 | 47 | class FreebaseFactAPI(Resource): 48 | # Gets all of the names for specified query 49 | def get(self): 50 | # Get topic_id and number of desired results 51 | topic_ids = request.args.get('topic_ids').split(',') 52 | num_results = request.args.get('num_results') 53 | num_results_per_topic = request.args.get("num_results_per_topic") 54 | if num_results_per_topic is None: 55 | num_results_per_topic = 10 56 | else: 57 | num_results_per_topic = int(num_results_per_topic) 58 | 59 | fb_helper = config.FREEBASE_HELPER 60 | facts, filtered_facts, name_fact_mapper, num_items = \ 61 | fb_helper.get_facts_by_ids(topic_ids, \ 62 | num_results=num_results, num_results_per_topic=num_results_per_topic) 63 | 64 | jsoned_facts = [{'src_freebase_name': fact.src.freebase_name, 65 | 'src_freebase_id': fact.src.freebase_id, 66 | 'pred_freebase_name': fact.pred.freebase_name, 67 | 'pred_freebase_id': fact.pred.freebase_id, 68 | 'tgt_freebase_name': fact.tgt.freebase_name, 69 | 'tgt_freebase_id': fact.tgt.freebase_id } for fact in facts] 70 | 71 | jsoned_filtered_facts = [{'src_freebase_name': fact.src.freebase_name, 72 | 'src_freebase_id': fact.src.freebase_id, 73 | 'pred_freebase_name': fact.pred.freebase_name, 74 | 'pred_freebase_id': fact.pred.freebase_id, 75 | 'tgt_freebase_name': fact.tgt.freebase_name, 76 | 'tgt_freebase_id': fact.tgt.freebase_id } for fact in filtered_facts] 77 | 78 | print(jsoned_filtered_facts) 79 | print(jsoned_facts) 80 | return jsonify(result=jsoned_filtered_facts, \ 81 | raw_facts = jsoned_facts, \ 82 | num_items=num_items, 83 | num_results_per_topic=num_results_per_topic) 84 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/resource/user.py: -------------------------------------------------------------------------------- 1 | from flask import jsonify 2 | from flask.ext.restful import Resource 3 | 4 | from model.abc import db 5 | from model import User 6 | from client import superhero 7 | from util import parse_params 8 | 9 | 10 | class UserListAPI(Resource): 11 | def get(self): 12 | return jsonify(data=[user.json for user in User.query]) 13 | 14 | @parse_params( 15 | {'name': 'email', 'type': str, 'required': True}, 16 | {'name': 'password', 'type': str, 'required': True} 17 | ) 18 | def post(self, params): 19 | user = User(**params) 20 | db.session.add(user) 21 | db.session.commit() 22 | return user.json 23 | 24 | 25 | class UserAPI(Resource): 26 | def get(self, id): 27 | user = User.query.get(id) 28 | user_dict = user.json 29 | user_dict['is_superhero'] = superhero.is_superhero(user.email) 30 | return user_dict 31 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/route/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/servers/FreebaseWebServer/src/route/__init__.py -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/route/common.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, jsonify, current_app 2 | from flask.ext.restful import Api 3 | 4 | 5 | common_blueprint = Blueprint('common', __name__) 6 | common_blueprint_api = Api(common_blueprint) 7 | 8 | 9 | @common_blueprint.route('/routes') 10 | def list_routes(): 11 | output = [] 12 | for rule in current_app.url_map.iter_rules(): 13 | methods = ','.join(rule.methods) 14 | line = "{:50s} {:20s}".format(str(rule), methods) 15 | output.append(line) 16 | return jsonify(routes=output) 17 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/route/freebase.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint 2 | from flask.ext.restful import Api 3 | 4 | 5 | freebase_blueprint = Blueprint('freebase', __name__) 6 | freebase_blueprint_api = Api(freebase_blueprint) 7 | 8 | 9 | from resource.freebase import FreebaseNameAPI, FreebaseFactAPI 10 | 11 | #, 'query', 'num_results' 12 | freebase_blueprint_api.add_resource(FreebaseNameAPI, '/api/v1/freebase/name') 13 | 14 | #, 'topic_ids', 'num_results' 15 | freebase_blueprint_api.add_resource(FreebaseFactAPI, '/api/v1/freebase/fact') 16 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/route/user.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint 2 | from flask.ext.restful import Api 3 | 4 | 5 | user_blueprint = Blueprint('user', __name__) 6 | user_blueprint_api = Api(user_blueprint) 7 | 8 | 9 | from resource.user import UserAPI, UserListAPI 10 | user_blueprint_api.add_resource(UserListAPI, '/user') 11 | user_blueprint_api.add_resource(UserAPI, '/user/') 12 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/server.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask.ext.cors import CORS 3 | 4 | import config 5 | from model.abc import db 6 | 7 | server = Flask(__name__) 8 | server.debug = config.DEBUG 9 | 10 | server.config['SQLALCHEMY_DATABASE_URI'] = config.DB_URI 11 | db.init_app(server) 12 | db.app = server 13 | 14 | CORS( 15 | server, 16 | resources={r"/*": {"origins": "*"}}, 17 | headers=['Content-Type', 'X-Requested-With', 'Authorization'] 18 | ) 19 | 20 | from route.common import common_blueprint 21 | server.register_blueprint(common_blueprint) 22 | 23 | from route.user import user_blueprint 24 | server.register_blueprint(user_blueprint) 25 | 26 | from route.freebase import freebase_blueprint 27 | server.register_blueprint(freebase_blueprint) 28 | 29 | if __name__ == '__main__': 30 | server.run(host=config.HOST, port=config.PORT) 31 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/util/__init__.py: -------------------------------------------------------------------------------- 1 | from .parse_params import parse_params 2 | from .freebase import FreebaseObject, FreebaseFact 3 | from .freebase_helper import FreebaseHelper -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/util/freebase.py: -------------------------------------------------------------------------------- 1 | class FreebaseObject(object): 2 | def __init__(self, cur_id, freebase_id, freebase_name, 3 | freebase_aliases = ['NOALIAS'], 4 | freebase_description = 'NODESCRIPTION'): 5 | 6 | self.freebase_aliases = freebase_aliases 7 | self.freebase_description = freebase_description 8 | self.freebase_name = freebase_name 9 | self.freebase_id = freebase_id 10 | self.id = cur_id 11 | 12 | class FreebaseFact(object): 13 | """ Creates a new freebase fact with specified src, predicate and target """ 14 | def __init__(self, src, pred, tgt): 15 | self.src = src 16 | self.pred = pred 17 | self.tgt = tgt 18 | 19 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/util/parse_params.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from flask.ext.restful import reqparse 3 | 4 | 5 | def parse_params(*arguments): 6 | def wrapper(func): 7 | @wraps(func) 8 | def decorated_function(*args, **kwargs): 9 | parser = reqparse.RequestParser() 10 | for argument in arguments: 11 | parser.add_argument(**argument) 12 | params = parser.parse_args() 13 | 14 | return func(*args, params=params, **kwargs) 15 | return decorated_function 16 | return wrapper 17 | -------------------------------------------------------------------------------- /servers/FreebaseWebServer/src/util/tokenizer.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from collections import Counter 3 | import unicodedata 4 | from unidecode import unidecode 5 | 6 | 7 | stopwords_list = ["a", "about", "above", "above", "does", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also","although","always","am","among", "amongst", "amoungst", "amount", "an", "and", "another", "any","anyhow","anyone","anything","anyway", "anywhere", "are", "around", "as", "at", "back","be","became", "because","become","becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom","but", "by", "call", "can", "cannot", "cant", "co", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven","else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "over", "own","part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thickv", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "the"] 8 | #stopwords_list = ["where", "what", "name", "a", "is", "of", "who", "why", "when", "was", "which", "what's"] 9 | verb_list = ["the", "does"] 10 | def remove_stopwords(sentence): 11 | sentence = sentence.replace("?", "") 12 | sentence = sentence.replace("'", ' ') 13 | items = sentence.split(' ') 14 | cleaned_string = map(lambda item:clean_token(item, stopwords_list), items) 15 | string_w_spaces = ' '.join(cleaned_string) 16 | raw_tokens = string_w_spaces.split() 17 | single_space_string = ' '.join(raw_tokens) 18 | 19 | if len(raw_tokens) < 1: 20 | return sentence 21 | else: 22 | return single_space_string 23 | 24 | def clean_token(token, stopwords_list): 25 | if token.lower() in stopwords_list or token == "s" or token in verb_list: 26 | return "" 27 | else: 28 | return token 29 | 30 | def replace_accents(token): 31 | """ Replaces accents with special value """ 32 | utf8_str = token.decode('utf-8') 33 | normalized_str = unidecode(utf8_str) 34 | return normalized_str 35 | 36 | def clean_name_arr(name_arr, query): 37 | """ Only returns values from name_dict whose keys are a substring of query 38 | name_dict: maps names to ids, keys 39 | """ 40 | correct_names = [] 41 | 42 | query = query + " " 43 | lowercase_query = query.lower() 44 | quote_removed_query = lowercase_query.replace('\\"', '') 45 | question_removed_query = lowercase_query.replace('?', '') 46 | quote_removed_question_query = lowercase_query.replace('"', '').replace('?', '') 47 | 48 | for k in name_arr: 49 | spaced_k = k.lower() + " " 50 | if spaced_k in lowercase_query or \ 51 | spaced_k in quote_removed_query or \ 52 | spaced_k in question_removed_query or \ 53 | spaced_k in quote_removed_question_query: 54 | correct_names.append(k) 55 | 56 | return correct_names 57 | 58 | def remove_substrings_arr(substring_arr): 59 | """ Remove any string in array that is a substring in another string 60 | """ 61 | substring_set = set(substring_arr) 62 | filtered_items = filter(lambda item: not is_substring(item, substring_set), substring_arr) 63 | return filtered_items 64 | 65 | def is_substring(string, string_set): 66 | """ 67 | Returns true if string is a substring of any string in 68 | string_set that is not equal to string 69 | """ 70 | substrings = filter(lambda cur_string: (string in cur_string) and string != cur_string, string_set) 71 | is_substring = len(substrings) > 0 72 | return is_substring 73 | 74 | def clean_name_dict(name_dict, query): 75 | """ Only returns values from name_dict whose keys are a substring of query 76 | name_dict: maps names to ids, keys 77 | """ 78 | correct_names = dict() 79 | 80 | lowercase_query = query.lower() 81 | for k, v in name_dict.iteritems(): 82 | if k.lower() in lowercase_query: 83 | correct_names[k] = v 84 | 85 | return correct_names -------------------------------------------------------------------------------- /servers/FreebaseWebServer/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/servers/FreebaseWebServer/test/__init__.py -------------------------------------------------------------------------------- /servers/FreebaseWebServer/test/route/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidgolub/SimpleQA/f072f87185c22a815c5abef3d0ff28eeac129119/servers/FreebaseWebServer/test/route/__init__.py -------------------------------------------------------------------------------- /servers/FreebaseWebServer/test/route/test_user.py: -------------------------------------------------------------------------------- 1 | import json 2 | import unittest 3 | from mock import patch 4 | 5 | from server import server 6 | from model.abc import db 7 | from model import User 8 | 9 | 10 | class TestUser(unittest.TestCase): 11 | 12 | def setUp(self): 13 | server.config['TESTING'] = True 14 | self.client = server.test_client() 15 | 16 | db.create_all() 17 | 18 | self.user = User('joe@example.fr', 'super-secret-password') 19 | db.session.add(self.user) 20 | db.session.commit() 21 | 22 | def tearDown(self): 23 | db.session.remove() 24 | db.drop_all() 25 | 26 | @patch('client.superhero.is_superhero') 27 | def test_get_user(self, is_superhero_mock): 28 | is_superhero_mock.return_value = True 29 | 30 | response = self.client.get( 31 | '/user/%d' % self.user.id, 32 | ) 33 | self.assertEqual(response.status_code, 200) 34 | result = json.loads(response.data.decode('utf-8')) 35 | self.assertEqual(result['email'], 'joe@example.fr') 36 | self.assertEqual(result['is_superhero'], True) 37 | 38 | if __name__ == '__main__': 39 | unittest.main() 40 | --------------------------------------------------------------------------------