├── .gitignore ├── README.md ├── buildmodel.py ├── data ├── README.md ├── build_mscoco_data.py └── download_and_preprocess_mscoco.sh ├── demo ├── gdo.py ├── static │ ├── css │ │ └── bootstrap-horizon.css │ └── icons │ │ ├── plus.png │ │ └── spin.gif └── templates │ └── index.html ├── evaluate.py ├── inception_v3(for TF 0.10).py ├── model.py ├── run_inference.py ├── run_inference_demo.py ├── tensorlayer ├── __init__.py ├── activation.py ├── cost.py ├── files.py ├── iterate.py ├── layers.py ├── nlp.py ├── ops.py ├── prepro.py ├── rein.py ├── utils.py └── visualize.py ├── tensorlayer1.2.2 ├── __init__.py ├── activation.py ├── cost.py ├── files.py ├── iterate.py ├── layers.py ├── nlp.py ├── ops.py ├── prepro.py ├── rein.py ├── utils.py └── visualize.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | .ckpt 3 | .ckpt-* 4 | .DS_Store 5 | ._.DS_Store 6 | data/mscoco/* 7 | data/*ckpt 8 | *ckpt 9 | *ckpt* 10 | *pyc 11 | model 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Image Captioning 2 | 3 | We reimplemented the complicated [Google' Image Captioning](https://github.com/tensorflow/models/tree/master/im2txt) model by simple [TensorLayer](https://github.com/zsdonghao/tensorlayer) APIs. 4 | 5 | This script run well under Python2 or 3 and TensorFlow 0.10 or 0.11. 6 | 7 | ### 1. Prepare MSCOCO data and Inception model 8 | Before you run the scripts, you need to follow Google's [setup guide]((https://github.com/tensorflow/models/tree/master/im2txt)), and setup the model, ckpt and data directories in *.py. 9 | 10 | - Creat a ``data`` folder. 11 | - Download and Preprocessing MSCOCO Data [click here](https://github.com/tensorflow/models/tree/master/research/im2txt) 12 | - Download the Inception_V3 CKPT [click here](https://github.com/tensorflow/models/tree/master/research/slim) 13 | 14 | 15 | ### 2. Train the model 16 | - via ``train.py`` 17 | 18 | ### 3. Evaluate the model 19 | - via ``evaluate.py`` 20 | 21 | ### 4. Generate captions by given image and model 22 | - via ``run_inference.py`` 23 | 24 | ### 5. Evaluation 25 | - [tylin/coco-caption](https://github.com/tylin/coco-caption/blob/master/cocoEvalCapDemo.ipynb) 26 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | ### Download and Preprocessing MSCOCO Data 2 | [click here](https://github.com/zsdonghao/models/tree/master/im2txt#prepare-the-training-data) 3 | #### Download the Inception_V3 CKPT 4 | [click here](https://github.com/zsdonghao/models/tree/master/im2txt#download-the-inception-v3-checkpoint) 5 | -------------------------------------------------------------------------------- /data/build_mscoco_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Converts MSCOCO data to TFRecord file format with SequenceExample protos. 16 | 17 | The MSCOCO images are expected to reside in JPEG files located in the following 18 | directory structure: 19 | 20 | train_image_dir/COCO_train2014_000000000151.jpg 21 | train_image_dir/COCO_train2014_000000000260.jpg 22 | ... 23 | 24 | and 25 | 26 | val_image_dir/COCO_val2014_000000000042.jpg 27 | val_image_dir/COCO_val2014_000000000073.jpg 28 | ... 29 | 30 | The MSCOCO annotations JSON files are expected to reside in train_captions_file 31 | and val_captions_file respectively. 32 | 33 | This script converts the combined MSCOCO data into sharded data files consisting 34 | of 256, 4 and 8 TFRecord files, respectively: 35 | 36 | output_dir/train-00000-of-00256 37 | output_dir/train-00001-of-00256 38 | ... 39 | output_dir/train-00255-of-00256 40 | 41 | and 42 | 43 | output_dir/val-00000-of-00004 44 | ... 45 | output_dir/val-00003-of-00004 46 | 47 | and 48 | 49 | output_dir/test-00000-of-00008 50 | ... 51 | output_dir/test-00007-of-00008 52 | 53 | Each TFRecord file contains ~2300 records. Each record within the TFRecord file 54 | is a serialized SequenceExample proto consisting of precisely one image-caption 55 | pair. Note that each image has multiple captions (usually 5) and therefore each 56 | image is replicated multiple times in the TFRecord files. 57 | 58 | The SequenceExample proto contains the following fields: 59 | 60 | context: 61 | image/image_id: integer MSCOCO image identifier 62 | image/data: string containing JPEG encoded image in RGB colorspace 63 | 64 | feature_lists: 65 | image/caption: list of strings containing the (tokenized) caption words 66 | image/caption_ids: list of integer ids corresponding to the caption words 67 | 68 | The captions are tokenized using the NLTK (http://www.nltk.org/) word tokenizer. 69 | The vocabulary of word identifiers is constructed from the sorted list (by 70 | descending frequency) of word tokens in the training set. Only tokens appearing 71 | at least 4 times are considered; all other words get the "unknown" word id. 72 | 73 | NOTE: This script will consume around 100GB of disk space because each image 74 | in the MSCOCO dataset is replicated ~5 times (once per caption) in the output. 75 | This is done for two reasons: 76 | 1. In order to better shuffle the training data. 77 | 2. It makes it easier to perform asynchronous preprocessing of each image in 78 | TensorFlow. 79 | 80 | Running this script using 16 threads may take around 1 hour on a HP Z420. 81 | """ 82 | 83 | from __future__ import absolute_import 84 | from __future__ import division 85 | from __future__ import print_function 86 | 87 | from collections import Counter 88 | from collections import namedtuple 89 | from datetime import datetime 90 | import json 91 | import os.path 92 | import random 93 | import sys 94 | import threading 95 | 96 | 97 | 98 | import nltk.tokenize 99 | import numpy as np 100 | from six.moves import xrange 101 | import tensorflow as tf 102 | 103 | tf.flags.DEFINE_string("train_image_dir", "/tmp/train2014/", 104 | "Training image directory.") 105 | tf.flags.DEFINE_string("val_image_dir", "/tmp/val2014", 106 | "Validation image directory.") 107 | 108 | tf.flags.DEFINE_string("train_captions_file", "/tmp/captions_train2014.json", 109 | "Training captions JSON file.") 110 | tf.flags.DEFINE_string("val_captions_file", "/tmp/captions_val2014.json", 111 | "Validation captions JSON file.") 112 | 113 | tf.flags.DEFINE_string("output_dir", "/tmp/", "Output data directory.") 114 | 115 | tf.flags.DEFINE_integer("train_shards", 256, 116 | "Number of shards in training TFRecord files.") 117 | tf.flags.DEFINE_integer("val_shards", 4, 118 | "Number of shards in validation TFRecord files.") 119 | tf.flags.DEFINE_integer("test_shards", 8, 120 | "Number of shards in testing TFRecord files.") 121 | 122 | tf.flags.DEFINE_string("start_word", "", 123 | "Special word added to the beginning of each sentence.") 124 | tf.flags.DEFINE_string("end_word", "", 125 | "Special word added to the end of each sentence.") 126 | tf.flags.DEFINE_string("unknown_word", "", 127 | "Special word meaning 'unknown'.") 128 | tf.flags.DEFINE_integer("min_word_count", 4, 129 | "The minimum number of occurrences of each word in the " 130 | "training set for inclusion in the vocabulary.") 131 | tf.flags.DEFINE_string("word_counts_output_file", "/tmp/word_counts.txt", 132 | "Output vocabulary file of word counts.") 133 | 134 | tf.flags.DEFINE_integer("num_threads", 8, 135 | "Number of threads to preprocess the images.") 136 | 137 | FLAGS = tf.flags.FLAGS 138 | 139 | ImageMetadata = namedtuple("ImageMetadata", 140 | ["image_id", "filename", "captions"]) 141 | 142 | 143 | class Vocabulary(object): 144 | """Simple vocabulary wrapper.""" 145 | 146 | def __init__(self, vocab, unk_id): 147 | """Initializes the vocabulary. 148 | 149 | Args: 150 | vocab: A dictionary of word to word_id. 151 | unk_id: Id of the special 'unknown' word. 152 | """ 153 | self._vocab = vocab 154 | self._unk_id = unk_id 155 | 156 | def word_to_id(self, word): 157 | """Returns the integer id of a word string.""" 158 | if word in self._vocab: 159 | return self._vocab[word] 160 | else: 161 | return self._unk_id 162 | 163 | 164 | class ImageDecoder(object): 165 | """Helper class for decoding images in TensorFlow.""" 166 | 167 | def __init__(self): 168 | # Create a single TensorFlow Session for all image decoding calls. 169 | self._sess = tf.Session() 170 | 171 | # TensorFlow ops for JPEG decoding. 172 | self._encoded_jpeg = tf.placeholder(dtype=tf.string) 173 | self._decode_jpeg = tf.image.decode_jpeg(self._encoded_jpeg, channels=3) 174 | 175 | def decode_jpeg(self, encoded_jpeg): 176 | image = self._sess.run(self._decode_jpeg, 177 | feed_dict={self._encoded_jpeg: encoded_jpeg}) 178 | assert len(image.shape) == 3 179 | assert image.shape[2] == 3 180 | return image 181 | 182 | 183 | def _int64_feature(value): 184 | """Wrapper for inserting an int64 Feature into a SequenceExample proto.""" 185 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 186 | 187 | 188 | def _bytes_feature(value): 189 | """Wrapper for inserting a bytes Feature into a SequenceExample proto.""" 190 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value)])) 191 | 192 | 193 | def _int64_feature_list(values): 194 | """Wrapper for inserting an int64 FeatureList into a SequenceExample proto.""" 195 | return tf.train.FeatureList(feature=[_int64_feature(v) for v in values]) 196 | 197 | 198 | def _bytes_feature_list(values): 199 | """Wrapper for inserting a bytes FeatureList into a SequenceExample proto.""" 200 | return tf.train.FeatureList(feature=[_bytes_feature(v) for v in values]) 201 | 202 | 203 | def _to_sequence_example(image, decoder, vocab): 204 | """Builds a SequenceExample proto for an image-caption pair. 205 | 206 | Args: 207 | image: An ImageMetadata object. 208 | decoder: An ImageDecoder object. 209 | vocab: A Vocabulary object. 210 | 211 | Returns: 212 | A SequenceExample proto. 213 | """ 214 | with tf.gfile.FastGFile(image.filename, "r") as f: 215 | encoded_image = f.read() 216 | 217 | try: 218 | decoder.decode_jpeg(encoded_image) 219 | except (tf.errors.InvalidArgumentError, AssertionError): 220 | print("Skipping file with invalid JPEG data: %s" % image.filename) 221 | return 222 | 223 | context = tf.train.Features(feature={ 224 | "image/image_id": _int64_feature(image.image_id), 225 | "image/data": _bytes_feature(encoded_image), 226 | }) 227 | 228 | assert len(image.captions) == 1 229 | caption = image.captions[0] 230 | caption_ids = [vocab.word_to_id(word) for word in caption] 231 | feature_lists = tf.train.FeatureLists(feature_list={ 232 | "image/caption": _bytes_feature_list(caption), 233 | "image/caption_ids": _int64_feature_list(caption_ids) 234 | }) 235 | sequence_example = tf.train.SequenceExample( 236 | context=context, feature_lists=feature_lists) 237 | 238 | return sequence_example 239 | 240 | 241 | def _process_image_files(thread_index, ranges, name, images, decoder, vocab, 242 | num_shards): 243 | """Processes and saves a subset of images as TFRecord files in one thread. 244 | 245 | Args: 246 | thread_index: Integer thread identifier within [0, len(ranges)]. 247 | ranges: A list of pairs of integers specifying the ranges of the dataset to 248 | process in parallel. 249 | name: Unique identifier specifying the dataset. 250 | images: List of ImageMetadata. 251 | decoder: An ImageDecoder object. 252 | vocab: A Vocabulary object. 253 | num_shards: Integer number of shards for the output files. 254 | """ 255 | # Each thread produces N shards where N = num_shards / num_threads. For 256 | # instance, if num_shards = 128, and num_threads = 2, then the first thread 257 | # would produce shards [0, 64). 258 | num_threads = len(ranges) 259 | assert not num_shards % num_threads 260 | num_shards_per_batch = int(num_shards / num_threads) 261 | 262 | shard_ranges = np.linspace(ranges[thread_index][0], ranges[thread_index][1], 263 | num_shards_per_batch + 1).astype(int) 264 | num_images_in_thread = ranges[thread_index][1] - ranges[thread_index][0] 265 | 266 | counter = 0 267 | for s in xrange(num_shards_per_batch): 268 | # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' 269 | shard = thread_index * num_shards_per_batch + s 270 | output_filename = "%s-%.5d-of-%.5d" % (name, shard, num_shards) 271 | output_file = os.path.join(FLAGS.output_dir, output_filename) 272 | writer = tf.python_io.TFRecordWriter(output_file) 273 | 274 | shard_counter = 0 275 | images_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) 276 | for i in images_in_shard: 277 | image = images[i] 278 | 279 | sequence_example = _to_sequence_example(image, decoder, vocab) 280 | if sequence_example is not None: 281 | writer.write(sequence_example.SerializeToString()) 282 | shard_counter += 1 283 | counter += 1 284 | 285 | if not counter % 1000: 286 | print("%s [thread %d]: Processed %d of %d items in thread batch." % 287 | (datetime.now(), thread_index, counter, num_images_in_thread)) 288 | sys.stdout.flush() 289 | 290 | writer.close() 291 | print("%s [thread %d]: Wrote %d image-caption pairs to %s" % 292 | (datetime.now(), thread_index, shard_counter, output_file)) 293 | sys.stdout.flush() 294 | shard_counter = 0 295 | print("%s [thread %d]: Wrote %d image-caption pairs to %d shards." % 296 | (datetime.now(), thread_index, counter, num_shards_per_batch)) 297 | sys.stdout.flush() 298 | 299 | 300 | def _process_dataset(name, images, vocab, num_shards): 301 | """Processes a complete data set and saves it as a TFRecord. 302 | 303 | Args: 304 | name: Unique identifier specifying the dataset. 305 | images: List of ImageMetadata. 306 | vocab: A Vocabulary object. 307 | num_shards: Integer number of shards for the output files. 308 | """ 309 | # Break up each image into a separate entity for each caption. 310 | images = [ImageMetadata(image.image_id, image.filename, [caption]) 311 | for image in images for caption in image.captions] 312 | 313 | # Shuffle the ordering of images. Make the randomization repeatable. 314 | random.seed(12345) 315 | random.shuffle(images) 316 | 317 | # Break the images into num_threads batches. Batch i is defined as 318 | # images[ranges[i][0]:ranges[i][1]]. 319 | num_threads = min(num_shards, FLAGS.num_threads) 320 | spacing = np.linspace(0, len(images), num_threads + 1).astype(np.int) 321 | ranges = [] 322 | threads = [] 323 | for i in xrange(len(spacing) - 1): 324 | ranges.append([spacing[i], spacing[i + 1]]) 325 | 326 | # Create a mechanism for monitoring when all threads are finished. 327 | coord = tf.train.Coordinator() 328 | 329 | # Create a utility for decoding JPEG images to run sanity checks. 330 | decoder = ImageDecoder() 331 | 332 | # Launch a thread for each batch. 333 | print("Launching %d threads for spacings: %s" % (num_threads, ranges)) 334 | for thread_index in xrange(len(ranges)): 335 | args = (thread_index, ranges, name, images, decoder, vocab, num_shards) 336 | t = threading.Thread(target=_process_image_files, args=args) 337 | t.start() 338 | threads.append(t) 339 | 340 | # Wait for all the threads to terminate. 341 | coord.join(threads) 342 | print("%s: Finished processing all %d image-caption pairs in data set '%s'." % 343 | (datetime.now(), len(images), name)) 344 | 345 | 346 | def _create_vocab(captions): 347 | """Creates the vocabulary of word to word_id. 348 | 349 | The vocabulary is saved to disk in a text file of word counts. The id of each 350 | word in the file is its corresponding 0-based line number. 351 | 352 | Args: 353 | captions: A list of lists of strings. 354 | 355 | Returns: 356 | A Vocabulary object. 357 | """ 358 | print("Creating vocabulary.") 359 | counter = Counter() 360 | for c in captions: 361 | counter.update(c) 362 | print("Total words:", len(counter)) 363 | 364 | # Filter uncommon words and sort by descending count. 365 | word_counts = [x for x in counter.items() if x[1] >= FLAGS.min_word_count] 366 | word_counts.sort(key=lambda x: x[1], reverse=True) 367 | print("Words in vocabulary:", len(word_counts)) 368 | 369 | # Write out the word counts file. 370 | with tf.gfile.FastGFile(FLAGS.word_counts_output_file, "w") as f: 371 | f.write("\n".join(["%s %d" % (w, c) for w, c in word_counts])) 372 | print("Wrote vocabulary file:", FLAGS.word_counts_output_file) 373 | 374 | # Create the vocabulary dictionary. 375 | reverse_vocab = [x[0] for x in word_counts] 376 | unk_id = len(reverse_vocab) 377 | vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)]) 378 | vocab = Vocabulary(vocab_dict, unk_id) 379 | 380 | return vocab 381 | 382 | 383 | def _process_caption(caption): 384 | """Processes a caption string into a list of tonenized words. 385 | 386 | Args: 387 | caption: A string caption. 388 | 389 | Returns: 390 | A list of strings; the tokenized caption. 391 | """ 392 | tokenized_caption = [FLAGS.start_word] 393 | tokenized_caption.extend(nltk.tokenize.word_tokenize(caption.lower())) 394 | tokenized_caption.append(FLAGS.end_word) 395 | return tokenized_caption 396 | 397 | 398 | def _load_and_process_metadata(captions_file, image_dir): 399 | """Loads image metadata from a JSON file and processes the captions. 400 | 401 | Args: 402 | captions_file: JSON file containing caption annotations. 403 | image_dir: Directory containing the image files. 404 | 405 | Returns: 406 | A list of ImageMetadata. 407 | """ 408 | with tf.gfile.FastGFile(captions_file, "r") as f: 409 | caption_data = json.load(f) 410 | 411 | # Extract the filenames. 412 | id_to_filename = [(x["id"], x["file_name"]) for x in caption_data["images"]] 413 | 414 | # Extract the captions. Each image_id is associated with multiple captions. 415 | id_to_captions = {} 416 | for annotation in caption_data["annotations"]: 417 | image_id = annotation["image_id"] 418 | caption = annotation["caption"] 419 | id_to_captions.setdefault(image_id, []) 420 | id_to_captions[image_id].append(caption) 421 | 422 | assert len(id_to_filename) == len(id_to_captions) 423 | assert set([x[0] for x in id_to_filename]) == set(id_to_captions.keys()) 424 | print("Loaded caption metadata for %d images from %s" % 425 | (len(id_to_filename), captions_file)) 426 | 427 | # Process the captions and combine the data into a list of ImageMetadata. 428 | print("Processing captions.") 429 | image_metadata = [] 430 | num_captions = 0 431 | for image_id, base_filename in id_to_filename: 432 | filename = os.path.join(image_dir, base_filename) 433 | captions = [_process_caption(c) for c in id_to_captions[image_id]] 434 | image_metadata.append(ImageMetadata(image_id, filename, captions)) 435 | num_captions += len(captions) 436 | print("Finished processing %d captions for %d images in %s" % 437 | (num_captions, len(id_to_filename), captions_file)) 438 | 439 | return image_metadata 440 | 441 | 442 | def main(unused_argv): 443 | def _is_valid_num_shards(num_shards): 444 | """Returns True if num_shards is compatible with FLAGS.num_threads.""" 445 | return num_shards < FLAGS.num_threads or not num_shards % FLAGS.num_threads 446 | 447 | assert _is_valid_num_shards(FLAGS.train_shards), ( 448 | "Please make the FLAGS.num_threads commensurate with FLAGS.train_shards") 449 | assert _is_valid_num_shards(FLAGS.val_shards), ( 450 | "Please make the FLAGS.num_threads commensurate with FLAGS.val_shards") 451 | assert _is_valid_num_shards(FLAGS.test_shards), ( 452 | "Please make the FLAGS.num_threads commensurate with FLAGS.test_shards") 453 | 454 | if not tf.gfile.IsDirectory(FLAGS.output_dir): 455 | tf.gfile.MakeDirs(FLAGS.output_dir) 456 | 457 | # Load image metadata from caption files. 458 | mscoco_train_dataset = _load_and_process_metadata(FLAGS.train_captions_file, 459 | FLAGS.train_image_dir) 460 | mscoco_val_dataset = _load_and_process_metadata(FLAGS.val_captions_file, 461 | FLAGS.val_image_dir) 462 | 463 | # Redistribute the MSCOCO data as follows: 464 | # train_dataset = 100% of mscoco_train_dataset + 85% of mscoco_val_dataset. 465 | # val_dataset = 5% of mscoco_val_dataset (for validation during training). 466 | # test_dataset = 10% of mscoco_val_dataset (for final evaluation). 467 | train_cutoff = int(0.85 * len(mscoco_val_dataset)) 468 | val_cutoff = int(0.90 * len(mscoco_val_dataset)) 469 | train_dataset = mscoco_train_dataset + mscoco_val_dataset[0:train_cutoff] 470 | val_dataset = mscoco_val_dataset[train_cutoff:val_cutoff] 471 | test_dataset = mscoco_val_dataset[val_cutoff:] 472 | 473 | # Create vocabulary from the training captions. 474 | train_captions = [c for image in train_dataset for c in image.captions] 475 | vocab = _create_vocab(train_captions) 476 | 477 | _process_dataset("train", train_dataset, vocab, FLAGS.train_shards) 478 | _process_dataset("val", val_dataset, vocab, FLAGS.val_shards) 479 | _process_dataset("test", test_dataset, vocab, FLAGS.test_shards) 480 | 481 | 482 | if __name__ == "__main__": 483 | tf.app.run() 484 | -------------------------------------------------------------------------------- /data/download_and_preprocess_mscoco.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | # Script to download and preprocess the MSCOCO data set. 18 | # 19 | # The outputs of this script are sharded TFRecord files containing serialized 20 | # SequenceExample protocol buffers. See build_mscoco_data.py for details of how 21 | # the SequenceExample protocol buffers are constructed. 22 | # 23 | # usage: 24 | # ./download_and_preprocess_mscoco.sh 25 | set -e 26 | 27 | if [ -z "$1" ]; then 28 | echo "usage download_and_preproces_mscoco.sh [data dir]" 29 | exit 30 | fi 31 | 32 | if [ "$(uname)" == "Darwin" ]; then 33 | UNZIP="tar -xf" 34 | else 35 | UNZIP="unzip -nq" 36 | fi 37 | 38 | # Create the output directories. 39 | OUTPUT_DIR="${1%/}" 40 | SCRATCH_DIR="${OUTPUT_DIR}/raw-data" 41 | mkdir -p "${OUTPUT_DIR}" 42 | mkdir -p "${SCRATCH_DIR}" 43 | CURRENT_DIR=$(pwd) 44 | WORK_DIR="$0.runfiles/im2txt/im2txt" 45 | 46 | # Helper function to download and unpack a .zip file. 47 | function download_and_unzip() { 48 | local BASE_URL=${1} 49 | local FILENAME=${2} 50 | 51 | if [ ! -f ${FILENAME} ]; then 52 | echo "Downloading ${FILENAME} to $(pwd)" 53 | wget -nd -c "${BASE_URL}/${FILENAME}" 54 | else 55 | echo "Skipping download of ${FILENAME}" 56 | fi 57 | echo "Unzipping ${FILENAME}" 58 | ${UNZIP} ${FILENAME} 59 | } 60 | 61 | cd ${SCRATCH_DIR} 62 | 63 | # Download the images. 64 | BASE_IMAGE_URL="http://msvocds.blob.core.windows.net/coco2014" 65 | 66 | TRAIN_IMAGE_FILE="train2014.zip" 67 | download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE} 68 | TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2014" 69 | 70 | VAL_IMAGE_FILE="val2014.zip" 71 | download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE} 72 | VAL_IMAGE_DIR="${SCRATCH_DIR}/val2014" 73 | 74 | # Download the captions. 75 | BASE_CAPTIONS_URL="http://msvocds.blob.core.windows.net/annotations-1-0-3" 76 | CAPTIONS_FILE="captions_train-val2014.zip" 77 | download_and_unzip ${BASE_CAPTIONS_URL} ${CAPTIONS_FILE} 78 | TRAIN_CAPTIONS_FILE="${SCRATCH_DIR}/annotations/captions_train2014.json" 79 | VAL_CAPTIONS_FILE="${SCRATCH_DIR}/annotations/captions_val2014.json" 80 | 81 | # Build TFRecords of the image data. 82 | cd "${CURRENT_DIR}" 83 | BUILD_SCRIPT="${WORK_DIR}/build_mscoco_data" 84 | "${BUILD_SCRIPT}" \ 85 | --train_image_dir="${TRAIN_IMAGE_DIR}" \ 86 | --val_image_dir="${VAL_IMAGE_DIR}" \ 87 | --train_captions_file="${TRAIN_CAPTIONS_FILE}" \ 88 | --val_captions_file="${VAL_CAPTIONS_FILE}" \ 89 | --output_dir="${OUTPUT_DIR}" \ 90 | --word_counts_output_file="${OUTPUT_DIR}/word_counts.txt" \ 91 | -------------------------------------------------------------------------------- /demo/gdo.py: -------------------------------------------------------------------------------- 1 | import os, subprocess, random, urllib, uuid, urlparse 2 | from PIL import Image 3 | from flask import Flask, request, render_template, jsonify 4 | app = Flask(__name__) 5 | app.config['TEMPLATES_AUTO_RELOAD'] = True 6 | 7 | 8 | """ 9 | 1. Install Flask: sudo pip install Flask 10 | 2. Curl: curl http://127.0.0.1:5000/ to read 11 | 3. sudo apt-get install ufw 12 | sudo ufw allow 5000 13 | 14 | http://sudodev.cn/flask-external-access/ 15 | 16 | """ 17 | 18 | # print(os.path.exists("run_inference_demo.py")) 19 | # exit() 20 | 21 | @app.route("/") 22 | def main(): 23 | os.putenv("CUDA_VISIBLE_DEVICES", "") 24 | return render_template('index.html') 25 | 26 | 27 | @app.route("/getImages") 28 | def get_image_path(): 29 | # images = random.sample(os.listdir("/home/dsigpu4/Samba/im2txt/im2txt/data/mscoco/raw-data/val2014/"), 10) 30 | images = random.sample(os.listdir("/home/haodong/Workspace/image_captioning/data/mscoco/raw-data/val2014/"), 10) 31 | return jsonify(images) 32 | 33 | 34 | @app.route("/getCaption") 35 | def get_caption(): 36 | # checkpoint_dir = "--checkpoint_path=/home/dsigpu4/Samba/im2txt/model/train" 37 | # vocab_file = "--vocab_file=/home/dsigpu4/Samba/im2txt/im2txt/data/mscoco/word_counts.txt" 38 | checkpoint_dir = "--checkpoint_path=/home/haodong/Workspace/image_captioning/model/train" 39 | vocab_file = "--vocab_file=/home/haodong/Workspace/image_captioning/data/mscoco/word_counts.txt" 40 | 41 | image_name = request.args.get('imageName') 42 | # image_file = "--input_files=/home/dsigpu4/Samba/im2txt/im2txt/data/mscoco/raw-data/val2014/%s" % (image_name) 43 | image_file = "--input_files=/home/haodong/Workspace/image_captioning/data/mscoco/raw-data/val2014/%s" % (image_name) 44 | param_str = "--checkpoint_path=%s --vocab_file=%s --input_files=%s" % (checkpoint_dir, vocab_file, image_file) 45 | 46 | # p = subprocess.Popen(["../bazel-bin/im2txt/run_inference", checkpoint_dir, vocab_file, image_file], stdout = subprocess.PIPE) 47 | p = subprocess.Popen(["../env2_2/bin/python", "/home/haodong/Workspace/image_captioning/run_inference_demo.py", checkpoint_dir, vocab_file, image_file], stdout = subprocess.PIPE) 48 | 49 | out = p.communicate() 50 | return out 51 | 52 | 53 | @app.route("/getCaptionUrl") 54 | def get_caption_url(): 55 | image_url = request.args.get('imageUrl') 56 | image_name = str(uuid.uuid4()) 57 | 58 | file_ext = os.path.splitext(os.path.basename(urlparse.urlsplit(image_url).path))[1] 59 | urllib.urlretrieve(image_url, "static/uploads/%s%s" % (image_name, file_ext)) 60 | 61 | # if file_ext != ".jpg": 62 | im = Image.open("static/uploads/%s%s" % (image_name, file_ext)) 63 | im.save("static/uploads/%s.jpg" % (image_name), "JPEG") 64 | 65 | # checkpoint_dir = "--checkpoint_path=/home/dsigpu4/Samba/im2txt/model/train" 66 | # vocab_file = "--vocab_file=/home/dsigpu4/Samba/im2txt/im2txt/data/mscoco/word_counts.txt" 67 | 68 | checkpoint_dir = "--checkpoint_path=/home/haodong/Workspace/image_captioning/model/train" 69 | vocab_file = "--vocab_file=/home/haodong/Workspace/image_captioning/data/mscoco/word_counts.txt" 70 | 71 | image_file = "--input_files=static/uploads/%s.jpg" % (image_name) 72 | param_str = "--checkpoint_path=%s --vocab_file=%s --input_files=%s" % (checkpoint_dir, vocab_file, image_file) 73 | 74 | # p = subprocess.Popen(["../bazel-bin/im2txt/run_inference", checkpoint_dir, vocab_file, image_file], stdout = subprocess.PIPE) 75 | p = subprocess.Popen(["../env2_2/bin/python", "/home/haodong/Workspace/image_captioning/run_inference_demo.py", checkpoint_dir, vocab_file, image_file], stdout = subprocess.PIPE) 76 | 77 | out = p.communicate() 78 | return out 79 | 80 | 81 | if __name__ == "__main__": 82 | # app.run(port='5001') # internal 83 | app.run(host='0.0.0.0', port='5002', threaded=True) # external 84 | -------------------------------------------------------------------------------- /demo/static/css/bootstrap-horizon.css: -------------------------------------------------------------------------------- 1 | .row-horizon { 2 | overflow-x: scroll; 3 | overflow-y: hidden; 4 | white-space: nowrap; 5 | } 6 | .row-horizon > [class*="col-lg"], .row-horizon > [class*="col-md"], .row-horizon > [class*="col-sm"], .row-horizon > [class*="col-xs"] { 7 | float: none; 8 | display: inline-block; 9 | white-space: normal; 10 | vertical-align: top; 11 | } 12 | .row-horizon > .col-xs-12 { 13 | width: 90%; 14 | } 15 | .row-horizon > .col-xs-11 { 16 | width: 82.5%; 17 | } 18 | .row-horizon > .col-xs-10 { 19 | width: 75%; 20 | } 21 | .row-horizon > .col-xs-9 { 22 | width: 67.5%; 23 | } 24 | .row-horizon > .col-xs-8 { 25 | width: 60%; 26 | } 27 | .row-horizon > .col-xs-7 { 28 | width: 52.5%; 29 | } 30 | .row-horizon > .col-xs-6 { 31 | width: 45%; 32 | } 33 | .row-horizon > .col-xs-5 { 34 | width: 37.5%; 35 | } 36 | .row-horizon > .col-xs-4 { 37 | width: 30%; 38 | } 39 | .row-horizon > .col-xs-3 { 40 | width: 22.5%; 41 | } 42 | .row-horizon > .col-xs-2 { 43 | width: 15%; 44 | } 45 | .row-horizon > .col-xs-1 { 46 | width: 7.5%; 47 | } 48 | @media (min-width: 768px) { 49 | .row-horizon > .col-sm-12 { 50 | width: 90%; 51 | } 52 | .row-horizon > .col-sm-11 { 53 | width: 82.5%; 54 | } 55 | .row-horizon > .col-sm-10 { 56 | width: 75%; 57 | } 58 | .row-horizon > .col-sm-9 { 59 | width: 67.5%; 60 | } 61 | .row-horizon > .col-sm-8 { 62 | width: 60%; 63 | } 64 | .row-horizon > .col-sm-7 { 65 | width: 52.5%; 66 | } 67 | .row-horizon > .col-sm-6 { 68 | width: 45%; 69 | } 70 | .row-horizon > .col-sm-5 { 71 | width: 37.5%; 72 | } 73 | .row-horizon > .col-sm-4 { 74 | width: 30%; 75 | } 76 | .row-horizon > .col-sm-3 { 77 | width: 22.5%; 78 | } 79 | .row-horizon > .col-sm-2 { 80 | width: 15%; 81 | } 82 | .row-horizon > .col-sm-1 { 83 | width: 7.5%; 84 | } 85 | } 86 | @media (min-width: 992px) { 87 | .row-horizon > .col-md-12 { 88 | width: 90%; 89 | } 90 | .row-horizon > .col-md-11 { 91 | width: 82.5%; 92 | } 93 | .row-horizon > .col-md-10 { 94 | width: 75%; 95 | } 96 | .row-horizon > .col-md-9 { 97 | width: 67.5%; 98 | } 99 | .row-horizon > .col-md-8 { 100 | width: 60%; 101 | } 102 | .row-horizon > .col-md-7 { 103 | width: 52.5%; 104 | } 105 | .row-horizon > .col-md-6 { 106 | width: 45%; 107 | } 108 | .row-horizon > .col-md-5 { 109 | width: 37.5%; 110 | } 111 | .row-horizon > .col-md-4 { 112 | width: 30%; 113 | } 114 | .row-horizon > .col-md-3 { 115 | width: 22.5%; 116 | } 117 | .row-horizon > .col-md-2 { 118 | width: 15%; 119 | } 120 | .row-horizon > .col-md-1 { 121 | width: 7.5%; 122 | } 123 | } 124 | @media (min-width: 1200px) { 125 | .row-horizon > .col-lg-12 { 126 | width: 90%; 127 | } 128 | .row-horizon > .col-lg-11 { 129 | width: 82.5%; 130 | } 131 | .row-horizon > .col-lg-10 { 132 | width: 75%; 133 | } 134 | .row-horizon > .col-lg-9 { 135 | width: 67.5%; 136 | } 137 | .row-horizon > .col-lg-8 { 138 | width: 60%; 139 | } 140 | .row-horizon > .col-lg-7 { 141 | width: 52.5%; 142 | } 143 | .row-horizon > .col-lg-6 { 144 | width: 45%; 145 | } 146 | .row-horizon > .col-lg-5 { 147 | width: 37.5%; 148 | } 149 | .row-horizon > .col-lg-4 { 150 | width: 30%; 151 | } 152 | .row-horizon > .col-lg-3 { 153 | width: 22.5%; 154 | } 155 | .row-horizon > .col-lg-2 { 156 | width: 15%; 157 | } 158 | .row-horizon > .col-lg-1 { 159 | width: 7.5%; 160 | } 161 | } -------------------------------------------------------------------------------- /demo/static/icons/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsdonghao/Image-Captioning/696ab29fecd4e85c6b01bf1196a5d4692f2d92d2/demo/static/icons/plus.png -------------------------------------------------------------------------------- /demo/static/icons/spin.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zsdonghao/Image-Captioning/696ab29fecd4e85c6b01bf1196a5d4692f2d92d2/demo/static/icons/spin.gif -------------------------------------------------------------------------------- /demo/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Caption Intelligency Demo 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 33 | 182 | 183 | 184 | 185 |
186 |

Caption Intelligency Demo

187 |

which uses backend image captioning scripts to generate captions

188 |
189 | 190 |
191 |
192 |
193 |
194 | 195 | 205 | 206 | 207 | 208 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | """Evaluate the image captioning model. 7 | 8 | This script should be run concurrently with training so that summaries show up 9 | in TensorBoard. 10 | """ 11 | 12 | 13 | from __future__ import absolute_import 14 | from __future__ import division 15 | from __future__ import print_function 16 | 17 | import math 18 | import os.path 19 | import time 20 | 21 | import numpy as np 22 | import tensorflow as tf 23 | import tensorlayer as tl 24 | from buildmodel import * 25 | 26 | 27 | DIR = "/home/haodong/Workspace/image_captioning" 28 | # MSCOCO_DIR = DIR + "/data/mscoco 29 | MSCOCO_DIR = "/home/haodong/Workspace/image_captioning/data/mscoco" 30 | MODEL_DIR = DIR + "/model" 31 | # Disable GPU 32 | # export CUDA_VISIBLE_DEVICES="" 33 | # Enable 1 GPU 34 | # export CUDA_VISIBLE_DEVICES=1 35 | # File pattern of sharded TFRecord input files. 36 | input_file_pattern = MSCOCO_DIR + "/val-?????-of-00004" 37 | # Directory containing model checkpoints. 38 | checkpoint_dir = MODEL_DIR + "/train" 39 | # Directory to write event logs. 40 | eval_dir = MODEL_DIR + "/eval" 41 | # Interval between evaluation runs, seconds 42 | eval_interval_secs = 600 43 | # Number of examples for evaluation. 44 | num_eval_examples = 10132 45 | # Minimum global step to run evaluation. 46 | min_global_step = 5000 47 | 48 | # Whether to train inception submodel variables. If True : Fine Tune the Inception v3 Model 49 | train_inception = False 50 | 51 | mode = "eval" 52 | assert mode in ["train", "eval", "inference"] 53 | 54 | tf.logging.set_verbosity(tf.logging.INFO) # Enable tf.logging 55 | 56 | 57 | def evaluate_model(sess, target_cross_entropy_losses, target_cross_entropy_loss_weights, global_step, summary_writer, summary_op): 58 | """Computes perplexity-per-word over the evaluation dataset. 59 | 60 | Summaries and perplexity-per-word are written out to the eval directory. 61 | 62 | Args: 63 | sess: Session object. 64 | model: Instance of ShowAndTellModel; the model to evaluate. 65 | global_step: Integer; global step of the model checkpoint. 66 | summary_writer: Instance of SummaryWriter. 67 | summary_op: Op for generating model summaries. 68 | """ 69 | # Log model summaries on a single batch. 70 | summary_str = sess.run(summary_op) 71 | summary_writer.add_summary(summary_str, global_step) 72 | 73 | # Compute perplexity over the entire dataset. 74 | num_eval_batches = int( 75 | math.ceil(num_eval_examples / batch_size)) 76 | 77 | start_time = time.time() 78 | sum_losses = 0. 79 | sum_weights = 0. 80 | for i in xrange(num_eval_batches): 81 | cross_entropy_losses, weights = sess.run([ 82 | target_cross_entropy_losses, 83 | target_cross_entropy_loss_weights 84 | ]) 85 | sum_losses += np.sum(cross_entropy_losses * weights) 86 | sum_weights += np.sum(weights) 87 | if not i % 100: 88 | tf.logging.info("Computed losses for %d of %d batches.", i + 1, 89 | num_eval_batches) 90 | eval_time = time.time() - start_time 91 | 92 | perplexity = math.exp(sum_losses / sum_weights) 93 | tf.logging.info("Perplexity = %f (%.2g sec)", perplexity, eval_time) 94 | 95 | # Log perplexity to the SummaryWriter. 96 | summary = tf.Summary() 97 | value = summary.value.add() 98 | value.simple_value = perplexity 99 | value.tag = "Perplexity" 100 | summary_writer.add_summary(summary, global_step) 101 | 102 | # Write the Events file to the eval directory. 103 | summary_writer.flush() 104 | tf.logging.info("Finished processing evaluation at global step %d.", 105 | global_step) 106 | 107 | 108 | def run_once(global_step, target_cross_entropy_losses, target_cross_entropy_loss_weights, saver, summary_writer, summary_op): 109 | """Evaluates the latest model checkpoint. 110 | 111 | Args: 112 | model: Instance of ShowAndTellModel; the model to evaluate. 113 | saver: Instance of tf.train.Saver for restoring model Variables. 114 | summary_writer: Instance of SummaryWriter. 115 | summary_op: Op for generating model summaries. 116 | """ 117 | # The lastest ckpt 118 | model_path = tf.train.latest_checkpoint(checkpoint_dir) 119 | # print(model_path) # /home/dsigpu4/Samba/im2txt/model/train_tl/model.ckpt-20000 120 | # exit() 121 | if not model_path: 122 | tf.logging.info("Skipping evaluation. No checkpoint found in: %s", 123 | checkpoint_dir) 124 | return 125 | 126 | with tf.Session() as sess: 127 | # Load model from checkpoint. 128 | tf.logging.info("Loading model from checkpoint: %s", model_path) 129 | saver.restore(sess, model_path) 130 | # global_step = tf.train.global_step(sess, model.global_step.name) 131 | step = tf.train.global_step(sess, global_step.name) 132 | tf.logging.info("Successfully loaded %s at global step = %d.", 133 | # os.path.basename(model_path), global_step) 134 | os.path.basename(model_path), step) 135 | # if global_step < min_global_step: 136 | if step < min_global_step: 137 | # tf.logging.info("Skipping evaluation. Global step = %d < %d", global_step, 138 | tf.logging.info("Skipping evaluation. Global step = %d < %d", step, 139 | min_global_step) 140 | return 141 | 142 | # Start the queue runners. 143 | coord = tf.train.Coordinator() 144 | threads = tf.train.start_queue_runners(coord=coord) 145 | 146 | # Run evaluation on the latest checkpoint. 147 | try: 148 | evaluate_model( 149 | sess=sess, 150 | target_cross_entropy_losses=target_cross_entropy_losses, 151 | target_cross_entropy_loss_weights=target_cross_entropy_loss_weights, 152 | global_step=step, 153 | summary_writer=summary_writer, 154 | summary_op=summary_op) 155 | except Exception, e: # pylint: disable=broad-except 156 | tf.logging.error("Evaluation failed.") 157 | coord.request_stop(e) 158 | 159 | coord.request_stop() 160 | coord.join(threads, stop_grace_period_secs=10) 161 | 162 | 163 | def run(): 164 | """Runs evaluation in a loop, and logs summaries to TensorBoard.""" 165 | # Create the evaluation directory if it doesn't exist. 166 | if not tf.gfile.IsDirectory(eval_dir): 167 | tf.logging.info("Creating eval directory: %s", eval_dir) 168 | tf.gfile.MakeDirs(eval_dir) 169 | 170 | g = tf.Graph() 171 | with g.as_default(): 172 | images, input_seqs, target_seqs, input_mask = Build_Inputs(mode, input_file_pattern) 173 | net_image_embeddings = Build_Image_Embeddings(mode, images, train_inception) 174 | net_seq_embeddings = Build_Seq_Embeddings(input_seqs) 175 | _, target_cross_entropy_losses, target_cross_entropy_loss_weights, network = \ 176 | Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, input_mask) 177 | 178 | global_step = tf.Variable( 179 | initial_value=0, 180 | dtype=tf.int32, 181 | name="global_step", 182 | trainable=False, 183 | collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES]) 184 | 185 | # Create the Saver to restore model Variables. 186 | saver = tf.train.Saver() 187 | 188 | # Create the summary operation and the summary writer. 189 | summary_op = tf.merge_all_summaries() 190 | summary_writer = tf.train.SummaryWriter(eval_dir) 191 | 192 | g.finalize() 193 | 194 | # Run a new evaluation run every eval_interval_secs. 195 | while True: 196 | start = time.time() 197 | tf.logging.info("Starting evaluation at " + time.strftime( 198 | "%Y-%m-%d-%H:%M:%S", time.localtime())) 199 | run_once(global_step, target_cross_entropy_losses, 200 | target_cross_entropy_loss_weights, 201 | saver, summary_writer, 202 | summary_op) 203 | time_to_next_eval = start + eval_interval_secs - time.time() 204 | if time_to_next_eval > 0: 205 | time.sleep(time_to_next_eval) 206 | 207 | 208 | def main(unused_argv): 209 | assert input_file_pattern, "--input_file_pattern is required" 210 | assert checkpoint_dir, "--checkpoint_dir is required" 211 | assert eval_dir, "--eval_dir is required" 212 | run() 213 | 214 | 215 | if __name__ == "__main__": 216 | tf.app.run() 217 | -------------------------------------------------------------------------------- /run_inference.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | 7 | """Generate captions for images by a given model.""" 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import math 14 | import os 15 | import numpy as np 16 | 17 | import tensorflow as tf 18 | import tensorlayer as tl 19 | from buildmodel import * 20 | 21 | DIR = '/home/lei/Documents/Workspace/models/research/im2txt/im2txt/' 22 | # Directory containing model checkpoints. 23 | CHECKPOINT_DIR = DIR+"model/train" 24 | # Vocabulary file generated by the preprocessing script. 25 | VOCAB_FILE = DIR+"data/mscoco/word_counts.txt" 26 | # JPEG image file to caption. 27 | IMAGE_FILE= DIR+"data/mscoco/raw-data/val2014/COCO_val2014_000000224477.jpg" 28 | # data/mscoco/raw-data/val2014/COCO_val2014_000000192970.jpg" 29 | # print(IMAGE_FILE) 30 | # exit() 31 | 32 | tf.logging.set_verbosity(tf.logging.INFO) # Enable tf.logging 33 | 34 | max_caption_length = 20 35 | top_k = 4 36 | print("top k:%d" % top_k) 37 | n_captions = 50 38 | 39 | def main(_): 40 | # Model checkpoint file or directory containing a model checkpoint file. 41 | checkpoint_path = CHECKPOINT_DIR 42 | # Text file containing the vocabulary. 43 | vocab_file = VOCAB_FILE 44 | # File pattern or comma-separated list of file patterns of image files. 45 | input_files = IMAGE_FILE 46 | 47 | mode = 'inference' 48 | 49 | # Build the inference graph. 50 | g = tf.Graph() 51 | with g.as_default(): 52 | images, input_seqs, target_seqs, input_mask, input_feed = Build_Inputs(mode, input_file_pattern=None) 53 | net_image_embeddings = Build_Image_Embeddings(mode, images, train_inception=False) 54 | net_seq_embeddings = Build_Seq_Embeddings(input_seqs) 55 | softmax, net_img_rnn, net_seq_rnn, state_feed = Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, input_mask) 56 | 57 | if tf.gfile.IsDirectory(checkpoint_path): 58 | checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) 59 | if not checkpoint_path: 60 | raise ValueError("No checkpoint file found in: %s" % checkpoint_path) 61 | 62 | saver = tf.train.Saver() 63 | def _restore_fn(sess): 64 | tf.logging.info("Loading model from checkpoint: %s", checkpoint_path) 65 | saver.restore(sess, checkpoint_path) 66 | tf.logging.info("Successfully loaded checkpoint: %s", 67 | os.path.basename(checkpoint_path)) 68 | 69 | restore_fn = _restore_fn 70 | g.finalize() 71 | 72 | # Create the vocabulary. 73 | vocab = tl.nlp.Vocabulary(vocab_file) 74 | 75 | filenames = [] 76 | for file_pattern in input_files.split(','): 77 | filenames.extend(tf.gfile.Glob(file_pattern.strip())) # Glob gets a list of file names which match the file_pattern 78 | 79 | tf.logging.info("Running caption generation on %d files matching %s", 80 | len(filenames), input_files) 81 | 82 | # Generate captions 83 | with tf.Session(graph=g) as sess: 84 | # Load the model from checkpoint. 85 | restore_fn(sess) 86 | for filename in filenames: 87 | with tf.gfile.GFile(filename, "r") as f: 88 | encoded_image = f.read() # it is string, haven't decode ! 89 | 90 | print(filename) 91 | init_state = sess.run(net_img_rnn.final_state,feed_dict={"image_feed:0": encoded_image}) 92 | for _ in range(n_captions): 93 | state = np.hstack((init_state.c, init_state.h)) # (1, 1024) 94 | a_id = vocab.start_id 95 | sentence = '' 96 | for _ in range(max_caption_length - 1): 97 | softmax_output, state = sess.run([softmax, net_seq_rnn.final_state], 98 | feed_dict={ input_feed : [a_id], 99 | state_feed : state, 100 | }) 101 | state = np.hstack((state.c, state.h)) 102 | a_id = tl.nlp.sample_top(softmax_output[0], top_k=top_k) 103 | word = vocab.id_to_word(a_id) 104 | if a_id == vocab.end_id: 105 | break 106 | sentence += word + ' ' 107 | 108 | print(' %s' % sentence) 109 | # print("# %s #" % sentence) 110 | 111 | 112 | if __name__ == "__main__": 113 | tf.app.run() 114 | -------------------------------------------------------------------------------- /run_inference_demo.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | 7 | """Generate captions for images by a given model.""" 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import math 14 | import os 15 | import numpy as np 16 | 17 | import tensorflow as tf 18 | import tensorlayer as tl 19 | from buildmodel import * 20 | 21 | # # Directory containing model checkpoints. 22 | # CHECKPOINT_DIR = "model/train" 23 | # # Vocabulary file generated by the preprocessing script. 24 | # VOCAB_FILE = "data/mscoco/word_counts.txt" 25 | # # JPEG image file to caption. 26 | # IMAGE_FILE= "data/mscoco/raw-data/val2014/COCO_val2014_000000224477.jpg, \ 27 | # data/mscoco/raw-data/val2014/COCO_val2014_000000192970.jpg" 28 | 29 | FLAGS = tf.flags.FLAGS 30 | 31 | tf.flags.DEFINE_string("checkpoint_path", "model/train", 32 | "Model checkpoint file or directory containing a " 33 | "model checkpoint file.") 34 | tf.flags.DEFINE_string("vocab_file", "data/mscoco/word_counts.txt", "Text file containing the vocabulary.") 35 | tf.flags.DEFINE_string("input_files", "data/mscoco/raw-data/val2014/COCO_val2014_000000224477.jpg, \ 36 | data/mscoco/raw-data/val2014/COCO_val2014_000000192970.jpg", 37 | "File pattern or comma-separated list of file patterns " 38 | "of image files.") 39 | 40 | 41 | tf.logging.set_verbosity(tf.logging.INFO) # Enable tf.logging 42 | 43 | max_caption_length = 30 44 | top_k = 3 45 | print("top k:%d" % top_k) 46 | n_captions = 5 47 | 48 | def main(_): 49 | # Model checkpoint file or directory containing a model checkpoint file. 50 | checkpoint_path = FLAGS.checkpoint_path #CHECKPOINT_DIR 51 | # Text file containing the vocabulary. 52 | vocab_file = FLAGS.vocab_file #VOCAB_FILE 53 | # File pattern or comma-separated list of file patterns of image files. 54 | input_files = FLAGS.input_files #IMAGE_FILE 55 | 56 | mode = 'inference' 57 | 58 | # Build the inference graph. 59 | g = tf.Graph() 60 | with g.as_default(): 61 | images, input_seqs, target_seqs, input_mask, input_feed = Build_Inputs(mode, input_file_pattern=None) 62 | net_image_embeddings = Build_Image_Embeddings(mode, images, train_inception=False) 63 | net_seq_embeddings = Build_Seq_Embeddings(input_seqs) 64 | softmax, net_img_rnn, net_seq_rnn, state_feed = Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, input_mask) 65 | 66 | if tf.gfile.IsDirectory(checkpoint_path): 67 | checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) 68 | if not checkpoint_path: 69 | raise ValueError("No checkpoint file found in: %s" % checkpoint_path) 70 | 71 | saver = tf.train.Saver() 72 | def _restore_fn(sess): 73 | tf.logging.info("Loading model from checkpoint: %s", checkpoint_path) 74 | saver.restore(sess, checkpoint_path) 75 | tf.logging.info("Successfully loaded checkpoint: %s", 76 | os.path.basename(checkpoint_path)) 77 | 78 | restore_fn = _restore_fn 79 | g.finalize() 80 | 81 | # Create the vocabulary. 82 | vocab = tl.nlp.Vocabulary(vocab_file) 83 | 84 | filenames = [] 85 | for file_pattern in input_files.split(','): 86 | filenames.extend(tf.gfile.Glob(file_pattern.strip())) # Glob gets a list of file names which match the file_pattern 87 | 88 | tf.logging.info("Running caption generation on %d files matching %s", 89 | len(filenames), input_files) 90 | 91 | # Generate captions 92 | with tf.Session(graph=g) as sess: 93 | # Load the model from checkpoint. 94 | restore_fn(sess) 95 | for filename in filenames: 96 | with tf.gfile.GFile(filename, "r") as f: 97 | encoded_image = f.read() # it is string, haven't decode ! 98 | 99 | # print(filename) 100 | print("Captions for image %s:" % os.path.basename(filename)) 101 | init_state = sess.run(net_img_rnn.final_state,feed_dict={"image_feed:0": encoded_image}) 102 | for _ in range(n_captions): 103 | state = np.hstack((init_state.c, init_state.h)) # (1, 1024) 104 | a_id = vocab.start_id 105 | sentence = '' 106 | for _ in range(max_caption_length - 1): 107 | softmax_output, state = sess.run([softmax, net_seq_rnn.final_state], 108 | feed_dict={ input_feed : [a_id], 109 | state_feed : state, 110 | }) 111 | state = np.hstack((state.c, state.h)) 112 | a_id = tl.nlp.sample_top(softmax_output[0], top_k=top_k) 113 | word = vocab.id_to_word(a_id) 114 | if a_id == vocab.end_id: 115 | break 116 | sentence += word + ' ' 117 | 118 | print('# %s #' % sentence) 119 | # print("# %s #" % sentence) 120 | 121 | 122 | if __name__ == "__main__": 123 | tf.app.run() 124 | -------------------------------------------------------------------------------- /tensorlayer/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Deep learning and Reinforcement learning library for Researchers and Engineers 3 | """ 4 | # from __future__ import absolute_import 5 | 6 | 7 | try: 8 | install_instr = "Please make sure you install a recent enough version of TensorFlow." 9 | import tensorflow 10 | except ImportError: 11 | raise ImportError("__init__.py : Could not import TensorFlow." + install_instr) 12 | 13 | from . import activation 14 | act = activation 15 | from . import cost 16 | from . import files 17 | # from . import init 18 | from . import iterate 19 | from . import layers 20 | from . import ops 21 | from . import utils 22 | from . import visualize 23 | from . import prepro # was preprocesse 24 | from . import nlp 25 | from . import rein 26 | 27 | 28 | __version__ = "1.3.11" 29 | -------------------------------------------------------------------------------- /tensorlayer/activation.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | import tensorflow as tf 7 | 8 | def identity(x, name=None): 9 | """The identity activation function, Shortcut is ``linear``. 10 | 11 | Parameters 12 | ---------- 13 | x : a tensor input 14 | input(s) 15 | 16 | 17 | Returns 18 | -------- 19 | A `Tensor` with the same type as `x`. 20 | """ 21 | return x 22 | 23 | # Shortcut 24 | linear = identity 25 | 26 | def ramp(x=None, v_min=0, v_max=1, name=None): 27 | """The ramp activation function. 28 | 29 | Parameters 30 | ---------- 31 | x : a tensor input 32 | input(s) 33 | v_min : float 34 | if input(s) smaller than v_min, change inputs to v_min 35 | v_max : float 36 | if input(s) greater than v_max, change inputs to v_max 37 | name : a string or None 38 | An optional name to attach to this activation function. 39 | 40 | 41 | Returns 42 | -------- 43 | A `Tensor` with the same type as `x`. 44 | """ 45 | return tf.clip_by_value(x, clip_value_min=v_min, clip_value_max=v_max, name=name) 46 | 47 | def leaky_relu(x=None, alpha=0.1, name="LeakyReLU"): 48 | """The LeakyReLU, Shortcut is ``lrelu``. 49 | 50 | Modified version of ReLU, introducing a nonzero gradient for negative 51 | input. 52 | 53 | Parameters 54 | ---------- 55 | x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, 56 | `int16`, or `int8`. 57 | alpha : `float`. slope. 58 | name : a string or None 59 | An optional name to attach to this activation function. 60 | 61 | Examples 62 | --------- 63 | >>> network = tl.layers.DenseLayer(network, n_units=100, name = 'dense_lrelu', 64 | ... act= lambda x : tl.act.lrelu(x, 0.2)) 65 | 66 | References 67 | ------------ 68 | - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013) `_ 69 | """ 70 | with tf.name_scope(name) as scope: 71 | # x = tf.nn.relu(x) 72 | # m_x = tf.nn.relu(-x) 73 | # x -= alpha * m_x 74 | x = tf.maximum(x, alpha * x) 75 | return x 76 | 77 | #Shortcut 78 | lrelu = leaky_relu 79 | 80 | def pixel_wise_softmax(output, name='pixel_wise_softmax'): 81 | """Return the softmax outputs of images, every pixels have multiple label, the sum of a pixel is 1. 82 | Usually be used for image segmentation. 83 | 84 | Parameters 85 | ------------ 86 | output : tensor 87 | - For 2d image, 4D tensor [batch_size, height, weight, channel], channel >= 2. 88 | - For 3d image, 5D tensor [batch_size, depth, height, weight, channel], channel >= 2. 89 | 90 | Examples 91 | --------- 92 | >>> outputs = pixel_wise_softmax(network.outputs) 93 | >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5) 94 | 95 | References 96 | ----------- 97 | - `tf.reverse `_ 98 | """ 99 | with tf.name_scope(name) as scope: 100 | return tf.nn.softmax(output) 101 | ## old implementation 102 | # exp_map = tf.exp(output) 103 | # if output.get_shape().ndims == 4: # 2d image 104 | # evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, True])) 105 | # elif output.get_shape().ndims == 5: # 3d image 106 | # evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, False, True])) 107 | # else: 108 | # raise Exception("output parameters should be 2d or 3d image, not %s" % str(output._shape)) 109 | # return tf.div(exp_map, evidence) 110 | -------------------------------------------------------------------------------- /tensorlayer/iterate.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | import numpy as np 7 | from six.moves import xrange 8 | 9 | def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False): 10 | """Generate a generator that input a group of example in numpy.array and 11 | their labels, return the examples and labels by the given batchsize. 12 | 13 | Parameters 14 | ---------- 15 | inputs : numpy.array 16 | (X) The input features, every row is a example. 17 | targets : numpy.array 18 | (y) The labels of inputs, every row is a example. 19 | batch_size : int 20 | The batch size. 21 | shuffle : boolean 22 | Indicating whether to use a shuffling queue, shuffle the dataset before return. 23 | 24 | Examples 25 | -------- 26 | >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) 27 | >>> y = np.asarray([0,1,2,3,4,5]) 28 | >>> for batch in tl.iterate.minibatches(inputs=X, targets=y, batch_size=2, shuffle=False): 29 | >>> print(batch) 30 | ... (array([['a', 'a'], 31 | ... ['b', 'b']], 32 | ... dtype='>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) 59 | >>> y = np.asarray([0, 1, 2, 3, 4, 5]) 60 | >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=y, batch_size=2, seq_length=2, stride=1): 61 | >>> print(batch) 62 | ... (array([['a', 'a'], 63 | ... ['b', 'b'], 64 | ... ['b', 'b'], 65 | ... ['c', 'c']], 66 | ... dtype='>> return_last = True 77 | >>> num_steps = 2 78 | >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) 79 | >>> Y = np.asarray([0,1,2,3,4,5]) 80 | >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=Y, batch_size=2, seq_length=num_steps, stride=1): 81 | >>> x, y = batch 82 | >>> if return_last: 83 | >>> tmp_y = y.reshape((-1, num_steps) + y.shape[1:]) 84 | >>> y = tmp_y[:, -1] 85 | >>> print(x, y) 86 | ... [['a' 'a'] 87 | ... ['b' 'b'] 88 | ... ['b' 'b'] 89 | ... ['c' 'c']] [1 2] 90 | ... [['c' 'c'] 91 | ... ['d' 'd'] 92 | ... ['d' 'd'] 93 | ... ['e' 'e']] [3 4] 94 | """ 95 | assert len(inputs) == len(targets) 96 | n_loads = (batch_size * stride) + (seq_length - stride) 97 | for start_idx in range(0, len(inputs) - n_loads + 1, (batch_size * stride)): 98 | seq_inputs = np.zeros((batch_size, seq_length) + inputs.shape[1:], 99 | dtype=inputs.dtype) 100 | seq_targets = np.zeros((batch_size, seq_length) + targets.shape[1:], 101 | dtype=targets.dtype) 102 | for b_idx in xrange(batch_size): 103 | start_seq_idx = start_idx + (b_idx * stride) 104 | end_seq_idx = start_seq_idx + seq_length 105 | seq_inputs[b_idx] = inputs[start_seq_idx:end_seq_idx] 106 | seq_targets[b_idx] = targets[start_seq_idx:end_seq_idx] 107 | flatten_inputs = seq_inputs.reshape((-1,) + inputs.shape[1:]) 108 | flatten_targets = seq_targets.reshape((-1,) + targets.shape[1:]) 109 | yield flatten_inputs, flatten_targets 110 | 111 | def seq_minibatches2(inputs, targets, batch_size, num_steps): 112 | """Generate a generator that iterates on two list of words. Yields (Returns) the source contexts and 113 | the target context by the given batch_size and num_steps (sequence_length), 114 | see ``PTB tutorial``. In TensorFlow's tutorial, this generates the batch_size pointers into the raw 115 | PTB data, and allows minibatch iteration along these pointers. 116 | 117 | - Hint, if the input data are images, you can modify the code as follow. 118 | 119 | .. code-block:: python 120 | 121 | from 122 | data = np.zeros([batch_size, batch_len) 123 | to 124 | data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]]) 125 | 126 | Parameters 127 | ---------- 128 | inputs : a list 129 | the context in list format; note that context usually be 130 | represented by splitting by space, and then convert to unique 131 | word IDs. 132 | targets : a list 133 | the context in list format; note that context usually be 134 | represented by splitting by space, and then convert to unique 135 | word IDs. 136 | batch_size : int 137 | the batch size. 138 | num_steps : int 139 | the number of unrolls. i.e. sequence_length 140 | 141 | Yields 142 | ------ 143 | Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. 144 | 145 | Raises 146 | ------ 147 | ValueError : if batch_size or num_steps are too high. 148 | 149 | Examples 150 | -------- 151 | >>> X = [i for i in range(20)] 152 | >>> Y = [i for i in range(20,40)] 153 | >>> for batch in tl.iterate.seq_minibatches2(X, Y, batch_size=2, num_steps=3): 154 | ... x, y = batch 155 | ... print(x, y) 156 | ... 157 | ... [[ 0. 1. 2.] 158 | ... [ 10. 11. 12.]] 159 | ... [[ 20. 21. 22.] 160 | ... [ 30. 31. 32.]] 161 | ... 162 | ... [[ 3. 4. 5.] 163 | ... [ 13. 14. 15.]] 164 | ... [[ 23. 24. 25.] 165 | ... [ 33. 34. 35.]] 166 | ... 167 | ... [[ 6. 7. 8.] 168 | ... [ 16. 17. 18.]] 169 | ... [[ 26. 27. 28.] 170 | ... [ 36. 37. 38.]] 171 | 172 | Code References 173 | --------------- 174 | - ``tensorflow/models/rnn/ptb/reader.py`` 175 | """ 176 | assert len(inputs) == len(targets) 177 | data_len = len(inputs) 178 | batch_len = data_len // batch_size 179 | # data = np.zeros([batch_size, batch_len]) 180 | data = np.zeros((batch_size, batch_len) + inputs.shape[1:], 181 | dtype=inputs.dtype) 182 | data2 = np.zeros([batch_size, batch_len]) 183 | 184 | for i in range(batch_size): 185 | data[i] = inputs[batch_len * i:batch_len * (i + 1)] 186 | data2[i] = targets[batch_len * i:batch_len * (i + 1)] 187 | 188 | epoch_size = (batch_len - 1) // num_steps 189 | 190 | if epoch_size == 0: 191 | raise ValueError("epoch_size == 0, decrease batch_size or num_steps") 192 | 193 | for i in range(epoch_size): 194 | x = data[:, i*num_steps:(i+1)*num_steps] 195 | x2 = data2[:, i*num_steps:(i+1)*num_steps] 196 | yield (x, x2) 197 | 198 | 199 | def ptb_iterator(raw_data, batch_size, num_steps): 200 | """ 201 | Generate a generator that iterates on a list of words, see PTB tutorial. Yields (Returns) the source contexts and 202 | the target context by the given batch_size and num_steps (sequence_length).\n 203 | see ``PTB tutorial``. 204 | 205 | e.g. x = [0, 1, 2] y = [1, 2, 3] , when batch_size = 1, num_steps = 3, 206 | raw_data = [i for i in range(100)] 207 | 208 | In TensorFlow's tutorial, this generates batch_size pointers into the raw 209 | PTB data, and allows minibatch iteration along these pointers. 210 | 211 | Parameters 212 | ---------- 213 | raw_data : a list 214 | the context in list format; note that context usually be 215 | represented by splitting by space, and then convert to unique 216 | word IDs. 217 | batch_size : int 218 | the batch size. 219 | num_steps : int 220 | the number of unrolls. i.e. sequence_length 221 | 222 | Yields 223 | ------ 224 | Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. 225 | The second element of the tuple is the same data time-shifted to the 226 | right by one. 227 | 228 | Raises 229 | ------ 230 | ValueError : if batch_size or num_steps are too high. 231 | 232 | Examples 233 | -------- 234 | >>> train_data = [i for i in range(20)] 235 | >>> for batch in tl.iterate.ptb_iterator(train_data, batch_size=2, num_steps=3): 236 | >>> x, y = batch 237 | >>> print(x, y) 238 | ... [[ 0 1 2] <---x 1st subset/ iteration 239 | ... [10 11 12]] 240 | ... [[ 1 2 3] <---y 241 | ... [11 12 13]] 242 | ... 243 | ... [[ 3 4 5] <--- 1st batch input 2nd subset/ iteration 244 | ... [13 14 15]] <--- 2nd batch input 245 | ... [[ 4 5 6] <--- 1st batch target 246 | ... [14 15 16]] <--- 2nd batch target 247 | ... 248 | ... [[ 6 7 8] 3rd subset/ iteration 249 | ... [16 17 18]] 250 | ... [[ 7 8 9] 251 | ... [17 18 19]] 252 | 253 | Code References 254 | ---------------- 255 | - ``tensorflow/models/rnn/ptb/reader.py`` 256 | """ 257 | raw_data = np.array(raw_data, dtype=np.int32) 258 | 259 | data_len = len(raw_data) 260 | batch_len = data_len // batch_size 261 | data = np.zeros([batch_size, batch_len], dtype=np.int32) 262 | for i in range(batch_size): 263 | data[i] = raw_data[batch_len * i:batch_len * (i + 1)] 264 | 265 | epoch_size = (batch_len - 1) // num_steps 266 | 267 | if epoch_size == 0: 268 | raise ValueError("epoch_size == 0, decrease batch_size or num_steps") 269 | 270 | for i in range(epoch_size): 271 | x = data[:, i*num_steps:(i+1)*num_steps] 272 | y = data[:, i*num_steps+1:(i+1)*num_steps+1] 273 | yield (x, y) 274 | 275 | 276 | 277 | # def minibatches_for_sequence2D(inputs, targets, batch_size, sequence_length, stride=1): 278 | # """ 279 | # Input a group of example in 2D numpy.array and their labels. 280 | # Return the examples and labels by the given batchsize, sequence_length. 281 | # Use for RNN. 282 | # 283 | # Parameters 284 | # ---------- 285 | # inputs : numpy.array 286 | # (X) The input features, every row is a example. 287 | # targets : numpy.array 288 | # (y) The labels of inputs, every row is a example. 289 | # batchsize : int 290 | # The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0 291 | # sequence_length : int 292 | # The sequence length 293 | # stride : int 294 | # The stride step 295 | # 296 | # Examples 297 | # -------- 298 | # >>> sequence_length = 2 299 | # >>> batch_size = 4 300 | # >>> stride = 1 301 | # >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]]) 302 | # >>> y_train = np.asarray(['0','1','2','3','4','5','6','7']) 303 | # >>> print('X_train = %s' % X_train) 304 | # >>> print('y_train = %s' % y_train) 305 | # >>> for batch in minibatches_for_sequence2D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride): 306 | # >>> inputs, targets = batch 307 | # >>> print(inputs) 308 | # >>> print(targets) 309 | # ... [[ 1. 2. 3.] 310 | # ... [ 4. 5. 6.] 311 | # ... [ 4. 5. 6.] 312 | # ... [ 7. 8. 9.]] 313 | # ... [1 2] 314 | # ... [[ 4. 5. 6.] 315 | # ... [ 7. 8. 9.] 316 | # ... [ 7. 8. 9.] 317 | # ... [ 10. 11. 12.]] 318 | # ... [2 3] 319 | # ... ... 320 | # ... [[ 16. 17. 18.] 321 | # ... [ 19. 20. 21.] 322 | # ... [ 19. 20. 21.] 323 | # ... [ 22. 23. 24.]] 324 | # ... [6 7] 325 | # """ 326 | # print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride)) 327 | # assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length 328 | # # assert int(batch_size % sequence_length) == 0, 'batch_size % sequence_length must == 0\ 329 | # # batch_size is number of examples rather than number of targets' 330 | # 331 | # # print(inputs.shape, len(inputs), len(inputs[0])) 332 | # 333 | # n_targets = int(batch_size/sequence_length) 334 | # # n_targets = int(np.ceil(batch_size/sequence_length)) 335 | # X = np.empty(shape=(0,len(inputs[0])), dtype=np.float32) 336 | # y = np.zeros(shape=(1, n_targets), dtype=np.int32) 337 | # 338 | # for idx in range(sequence_length, len(inputs), stride): # go through all example during 1 epoch 339 | # for n in range(n_targets): # for num of target 340 | # X = np.concatenate((X, inputs[idx-sequence_length+n:idx+n])) 341 | # y[0][n] = targets[idx-1+n] 342 | # # y = np.vstack((y, targets[idx-1+n])) 343 | # yield X, y[0] 344 | # X = np.empty(shape=(0,len(inputs[0]))) 345 | # # y = np.empty(shape=(1,0)) 346 | # 347 | # 348 | # def minibatches_for_sequence4D(inputs, targets, batch_size, sequence_length, stride=1): # 349 | # """ 350 | # Input a group of example in 4D numpy.array and their labels. 351 | # Return the examples and labels by the given batchsize, sequence_length. 352 | # Use for RNN. 353 | # 354 | # Parameters 355 | # ---------- 356 | # inputs : numpy.array 357 | # (X) The input features, every row is a example. 358 | # targets : numpy.array 359 | # (y) The labels of inputs, every row is a example. 360 | # batchsize : int 361 | # The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0 362 | # sequence_length : int 363 | # The sequence length 364 | # stride : int 365 | # The stride step 366 | # 367 | # Examples 368 | # -------- 369 | # >>> sequence_length = 2 370 | # >>> batch_size = 2 371 | # >>> stride = 1 372 | # >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]]) 373 | # >>> y_train = np.asarray(['0','1','2','3','4','5','6','7']) 374 | # >>> X_train = np.expand_dims(X_train, axis=1) 375 | # >>> X_train = np.expand_dims(X_train, axis=3) 376 | # >>> for batch in minibatches_for_sequence4D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride): 377 | # >>> inputs, targets = batch 378 | # >>> print(inputs) 379 | # >>> print(targets) 380 | # ... [[[[ 1.] 381 | # ... [ 2.] 382 | # ... [ 3.]]] 383 | # ... [[[ 4.] 384 | # ... [ 5.] 385 | # ... [ 6.]]]] 386 | # ... [1] 387 | # ... [[[[ 4.] 388 | # ... [ 5.] 389 | # ... [ 6.]]] 390 | # ... [[[ 7.] 391 | # ... [ 8.] 392 | # ... [ 9.]]]] 393 | # ... [2] 394 | # ... ... 395 | # ... [[[[ 19.] 396 | # ... [ 20.] 397 | # ... [ 21.]]] 398 | # ... [[[ 22.] 399 | # ... [ 23.] 400 | # ... [ 24.]]]] 401 | # ... [7] 402 | # """ 403 | # print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride)) 404 | # assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length 405 | # # assert int(batch_size % sequence_length) == 0, 'in LSTM, batch_size % sequence_length must == 0\ 406 | # # batch_size is number of X_train rather than number of targets' 407 | # assert stride >= 1, 'stride must be >=1, at least move 1 step for each iternation' 408 | # 409 | # n_example, n_channels, width, height = inputs.shape 410 | # print('n_example=%d n_channels=%d width=%d height=%d' % (n_example, n_channels, width, height)) 411 | # 412 | # n_targets = int(np.ceil(batch_size/sequence_length)) # 实际为 batchsize/sequence_length + 1 413 | # print(n_targets) 414 | # X = np.zeros(shape=(batch_size, n_channels, width, height), dtype=np.float32) 415 | # # X = np.zeros(shape=(n_targets, sequence_length, n_channels, width, height), dtype=np.float32) 416 | # y = np.zeros(shape=(1,n_targets), dtype=np.int32) 417 | # # y = np.empty(shape=(0,1), dtype=np.float32) 418 | # # time.sleep(2) 419 | # for idx in range(sequence_length, n_example-n_targets+2, stride): # go through all example during 1 epoch 420 | # for n in range(n_targets): # for num of target 421 | # # print(idx+n, inputs[idx-sequence_length+n : idx+n].shape) 422 | # X[n*sequence_length : (n+1)*sequence_length] = inputs[idx+n-sequence_length : idx+n] 423 | # # X[n] = inputs[idx-sequence_length+n:idx+n] 424 | # y[0][n] = targets[idx+n-1] 425 | # # y = np.vstack((y, targets[idx-1+n])) 426 | # # y = targets[idx: idx+n_targets] 427 | # yield X, y[0] 428 | -------------------------------------------------------------------------------- /tensorlayer/ops.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | 7 | import tensorflow as tf 8 | import os 9 | import sys 10 | from sys import platform as _platform 11 | 12 | 13 | def exit_tf(sess=None): 14 | """Close tensorboard and nvidia-process if available 15 | 16 | Parameters 17 | ---------- 18 | sess : a session instance of TensorFlow 19 | TensorFlow session 20 | """ 21 | text = "[tl] Close tensorboard and nvidia-process if available" 22 | sess.close() 23 | # import time 24 | # time.sleep(2) 25 | if _platform == "linux" or _platform == "linux2": 26 | print('linux: %s' % text) 27 | os.system('nvidia-smi') 28 | os.system('fuser 6006/tcp -k') # kill tensorboard 6006 29 | os.system("nvidia-smi | grep python |awk '{print $3}'|xargs kill") # kill all nvidia-smi python process 30 | elif _platform == "darwin": 31 | print('OS X: %s' % text) 32 | os.system("lsof -i tcp:6006 | grep -v PID | awk '{print $2}' | xargs kill") # kill tensorboard 6006 33 | elif _platform == "win32": 34 | print('Windows: %s' % text) 35 | else: 36 | print(_platform) 37 | exit() 38 | 39 | def clear_all(printable=True): 40 | """Clears all the placeholder variables of keep prob, 41 | including keeping probabilities of all dropout, denoising, dropconnect etc. 42 | 43 | Parameters 44 | ---------- 45 | printable : boolean 46 | If True, print all deleted variables. 47 | """ 48 | print('clear all .....................................') 49 | gl = globals().copy() 50 | for var in gl: 51 | if var[0] == '_': continue 52 | if 'func' in str(globals()[var]): continue 53 | if 'module' in str(globals()[var]): continue 54 | if 'class' in str(globals()[var]): continue 55 | 56 | if printable: 57 | print(" clear_all ------- %s" % str(globals()[var])) 58 | 59 | del globals()[var] 60 | 61 | # def clear_all2(vars, printable=True): 62 | # """ 63 | # The :function:`clear_all()` Clears all the placeholder variables of keep prob, 64 | # including keeping probabilities of all dropout, denoising, dropconnect 65 | # Parameters 66 | # ---------- 67 | # printable : if True, print all deleted variables. 68 | # """ 69 | # print('clear all .....................................') 70 | # for var in vars: 71 | # if var[0] == '_': continue 72 | # if 'func' in str(var): continue 73 | # if 'module' in str(var): continue 74 | # if 'class' in str(var): continue 75 | # 76 | # if printable: 77 | # print(" clear_all ------- %s" % str(var)) 78 | # 79 | # del var 80 | 81 | def set_gpu_fraction(sess=None, gpu_fraction=0.3): 82 | """Set the GPU memory fraction for the application. 83 | 84 | Parameters 85 | ---------- 86 | sess : a session instance of TensorFlow 87 | TensorFlow session 88 | gpu_fraction : a float 89 | Fraction of GPU memory, (0 ~ 1] 90 | 91 | References 92 | ---------- 93 | - `TensorFlow using GPU `_ 94 | """ 95 | print(" tensorlayer: GPU MEM Fraction %f" % gpu_fraction) 96 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) 97 | sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) 98 | return sess 99 | 100 | 101 | 102 | 103 | 104 | def disable_print(): 105 | """Disable console output, ``suppress_stdout`` is recommended. 106 | 107 | Examples 108 | --------- 109 | >>> print("You can see me") 110 | >>> tl.ops.disable_print() 111 | >>> print(" You can't see me") 112 | >>> tl.ops.enable_print() 113 | >>> print("You can see me") 114 | """ 115 | # sys.stdout = os.devnull # this one kill the process 116 | sys.stdout = None 117 | sys.stderr = os.devnull 118 | 119 | def enable_print(): 120 | """Enable console output, ``suppress_stdout`` is recommended. 121 | 122 | Examples 123 | -------- 124 | - see tl.ops.disable_print() 125 | """ 126 | sys.stdout = sys.__stdout__ 127 | sys.stderr = sys.__stderr__ 128 | 129 | 130 | # class temporary_disable_print: 131 | # """Temporarily disable console output. 132 | # 133 | # Examples 134 | # --------- 135 | # >>> print("You can see me") 136 | # >>> with tl.ops.temporary_disable_print() as t: 137 | # >>> print("You can't see me") 138 | # >>> print("You can see me") 139 | # """ 140 | # def __init__(self): 141 | # pass 142 | # def __enter__(self): 143 | # sys.stdout = None 144 | # sys.stderr = os.devnull 145 | # def __exit__(self, type, value, traceback): 146 | # sys.stdout = sys.__stdout__ 147 | # sys.stderr = sys.__stderr__ 148 | # return isinstance(value, TypeError) 149 | 150 | 151 | from contextlib import contextmanager 152 | @contextmanager 153 | def suppress_stdout(): 154 | """Temporarily disable console output. 155 | 156 | Examples 157 | --------- 158 | >>> print("You can see me") 159 | >>> with tl.ops.suppress_stdout(): 160 | >>> print("You can't see me") 161 | >>> print("You can see me") 162 | 163 | References 164 | ----------- 165 | - `stackoverflow `_ 166 | """ 167 | with open(os.devnull, "w") as devnull: 168 | old_stdout = sys.stdout 169 | sys.stdout = devnull 170 | try: 171 | yield 172 | finally: 173 | sys.stdout = old_stdout 174 | 175 | 176 | 177 | def get_site_packages_directory(): 178 | """Print and return the site-packages directory. 179 | 180 | Examples 181 | --------- 182 | >>> loc = tl.ops.get_site_packages_directory() 183 | """ 184 | import site 185 | try: 186 | loc = site.getsitepackages() 187 | print(" tl.ops : site-packages in ", loc) 188 | return loc 189 | except: 190 | print(" tl.ops : Cannot find package dir from virtual environment") 191 | return False 192 | 193 | 194 | 195 | def empty_trash(): 196 | """Empty trash folder. 197 | 198 | """ 199 | text = "[tl] Empty the trash" 200 | if _platform == "linux" or _platform == "linux2": 201 | print('linux: %s' % text) 202 | os.system("rm -rf ~/.local/share/Trash/*") 203 | elif _platform == "darwin": 204 | print('OS X: %s' % text) 205 | os.system("sudo rm -rf ~/.Trash/*") 206 | elif _platform == "win32": 207 | print('Windows: %s' % text) 208 | try: 209 | os.system("rd /s c:\$Recycle.Bin") # Windows 7 or Server 2008 210 | except: 211 | pass 212 | try: 213 | os.system("rd /s c:\recycler") # Windows XP, Vista, or Server 2003 214 | except: 215 | pass 216 | else: 217 | print(_platform) 218 | 219 | # 220 | -------------------------------------------------------------------------------- /tensorlayer/rein.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | from six.moves import xrange 9 | 10 | def discount_episode_rewards(rewards=[], gamma=0.99): 11 | """ Take 1D float array of rewards and compute discounted rewards for an 12 | episode. When encount a non-zero value, consider as the end a of an episode. 13 | 14 | Parameters 15 | ---------- 16 | rewards : numpy list 17 | a list of rewards 18 | gamma : float 19 | discounted factor 20 | 21 | Examples 22 | ---------- 23 | >>> rewards = np.asarray([0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1]) 24 | >>> gamma = 0.9 25 | >>> discount_rewards = tl.rein.discount_episode_rewards(rewards, gamma) 26 | >>> print(discount_rewards) 27 | ... [ 0.72899997 0.81 0.89999998 1. 0.72899997 0.81 28 | ... 0.89999998 1. 0.72899997 0.81 0.89999998 1. ] 29 | """ 30 | discounted_r = np.zeros_like(rewards, dtype=np.float32) 31 | running_add = 0 32 | for t in reversed(xrange(0, rewards.size)): 33 | if rewards[t] != 0: running_add = 0 34 | 35 | running_add = running_add * gamma + rewards[t] 36 | discounted_r[t] = running_add 37 | return discounted_r 38 | 39 | 40 | def cross_entropy_reward_loss(logits, actions, rewards, name=None): 41 | """ Calculate the loss for Policy Gradient Network. 42 | 43 | Parameters 44 | ---------- 45 | logits : tensor 46 | The network outputs without softmax. This function implements softmax 47 | inside. 48 | actions : tensor/ placeholder 49 | The agent actions. 50 | rewards : tensor/ placeholder 51 | The rewards. 52 | 53 | Examples 54 | ---------- 55 | >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D]) # observation for training 56 | >>> network = tl.layers.InputLayer(states_batch_pl, name='input_layer') 57 | >>> network = tl.layers.DenseLayer(network, n_units=H, act = tf.nn.relu, name='relu1') 58 | >>> network = tl.layers.DenseLayer(network, n_units=3, act = tl.activation.identity, name='output_layer') 59 | >>> probs = network.outputs 60 | >>> sampling_prob = tf.nn.softmax(probs) 61 | >>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None]) 62 | >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None]) 63 | >>> loss = cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl) 64 | >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss) 65 | """ 66 | 67 | try: # TF 1.0 68 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=actions, logits=logits, name=name) 69 | except: 70 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, targets=actions) 71 | # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, actions) 72 | 73 | try: ## TF1.0 74 | loss = tf.reduce_sum(tf.multiply(cross_entropy, rewards)) 75 | except: ## TF0.12 76 | loss = tf.reduce_sum(tf.mul(cross_entropy, rewards)) # element-wise mul 77 | return loss 78 | -------------------------------------------------------------------------------- /tensorlayer/utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | import tensorflow as tf 4 | import tensorlayer as tl 5 | from . import iterate 6 | import numpy as np 7 | import time 8 | import math 9 | 10 | 11 | def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_size=100, 12 | n_epoch=100, print_freq=5, X_val=None, y_val=None, eval_train=True, 13 | tensorboard=False, tensorboard_epoch_freq=5, tensorboard_weight_histograms=True, tensorboard_graph_vis=True): 14 | """Traing a given non time-series network by the given cost function, training data, batch_size, n_epoch etc. 15 | 16 | Parameters 17 | ---------- 18 | sess : TensorFlow session 19 | sess = tf.InteractiveSession() 20 | network : a TensorLayer layer 21 | the network will be trained 22 | train_op : a TensorFlow optimizer 23 | like tf.train.AdamOptimizer 24 | X_train : numpy array 25 | the input of training data 26 | y_train : numpy array 27 | the target of training data 28 | x : placeholder 29 | for inputs 30 | y_ : placeholder 31 | for targets 32 | acc : the TensorFlow expression of accuracy (or other metric) or None 33 | if None, would not display the metric 34 | batch_size : int 35 | batch size for training and evaluating 36 | n_epoch : int 37 | the number of training epochs 38 | print_freq : int 39 | display the training information every ``print_freq`` epochs 40 | X_val : numpy array or None 41 | the input of validation data 42 | y_val : numpy array or None 43 | the target of validation data 44 | eval_train : boolean 45 | if X_val and y_val are not None, it refects whether to evaluate the training data 46 | tensorboard : boolean 47 | if True summary data will be stored to the log/ direcory for visualization with tensorboard. 48 | See also detailed tensorboard_X settings for specific configurations of features. (default False) 49 | Also runs tl.layers.initialize_global_variables(sess) internally in fit() to setup the summary nodes, see Note: 50 | tensorboard_epoch_freq : int 51 | how many epochs between storing tensorboard checkpoint for visualization to log/ directory (default 5) 52 | tensorboard_weight_histograms : boolean 53 | if True updates tensorboard data in the logs/ directory for visulaization 54 | of the weight histograms every tensorboard_epoch_freq epoch (default True) 55 | tensorboard_graph_vis : boolean 56 | if True stores the graph in the tensorboard summaries saved to log/ (default True) 57 | Examples 58 | -------- 59 | >>> see tutorial_mnist_simple.py 60 | >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_, 61 | ... acc=acc, batch_size=500, n_epoch=200, print_freq=5, 62 | ... X_val=X_val, y_val=y_val, eval_train=False) 63 | >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_, 64 | ... acc=acc, batch_size=500, n_epoch=200, print_freq=5, 65 | ... X_val=X_val, y_val=y_val, eval_train=False, 66 | ... tensorboard=True, tensorboard_weight_histograms=True, tensorboard_graph_vis=True) 67 | 68 | Note 69 | -------- 70 | If tensorboard=True, the global_variables_initializer will be run inside the fit function 71 | in order to initalize the automatically generated summary nodes used for tensorboard visualization, 72 | thus tf.global_variables_initializer().run() before the fit() call will be undefined. 73 | """ 74 | assert X_train.shape[0] >= batch_size, "Number of training examples should be bigger than the batch size" 75 | 76 | if(tensorboard): 77 | print("Setting up tensorboard ...") 78 | #Set up tensorboard summaries and saver 79 | tl.files.exists_or_mkdir('logs/') 80 | 81 | #Only write summaries for more recent TensorFlow versions 82 | if hasattr(tf, 'summary') and hasattr(tf.summary, 'FileWriter'): 83 | if tensorboard_graph_vis: 84 | train_writer = tf.summary.FileWriter('logs/train',sess.graph) 85 | val_writer = tf.summary.FileWriter('logs/validation',sess.graph) 86 | else: 87 | train_writer = tf.summary.FileWriter('logs/train') 88 | val_writer = tf.summary.FileWriter('logs/validation') 89 | 90 | #Set up summary nodes 91 | if(tensorboard_weight_histograms): 92 | for param in network.all_params: 93 | if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'): 94 | print('Param name ', param.name) 95 | tf.summary.histogram(param.name, param) 96 | 97 | if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'): 98 | tf.summary.scalar('cost', cost) 99 | 100 | merged = tf.summary.merge_all() 101 | 102 | #Initalize all variables and summaries 103 | tl.layers.initialize_global_variables(sess) 104 | print("Finished! use $tensorboard --logdir=logs/ to start server") 105 | 106 | print("Start training the network ...") 107 | start_time_begin = time.time() 108 | tensorboard_train_index, tensorboard_val_index = 0, 0 109 | for epoch in range(n_epoch): 110 | start_time = time.time() 111 | loss_ep = 0; n_step = 0 112 | for X_train_a, y_train_a in iterate.minibatches(X_train, y_train, 113 | batch_size, shuffle=True): 114 | feed_dict = {x: X_train_a, y_: y_train_a} 115 | feed_dict.update( network.all_drop ) # enable noise layers 116 | loss, _ = sess.run([cost, train_op], feed_dict=feed_dict) 117 | loss_ep += loss 118 | n_step += 1 119 | loss_ep = loss_ep/ n_step 120 | 121 | if tensorboard and hasattr(tf, 'summary'): 122 | if epoch+1 == 1 or (epoch+1) % tensorboard_epoch_freq == 0: 123 | for X_train_a, y_train_a in iterate.minibatches( 124 | X_train, y_train, batch_size, shuffle=True): 125 | dp_dict = dict_to_one( network.all_drop ) # disable noise layers 126 | feed_dict = {x: X_train_a, y_: y_train_a} 127 | feed_dict.update(dp_dict) 128 | result = sess.run(merged, feed_dict=feed_dict) 129 | train_writer.add_summary(result, tensorboard_train_index) 130 | tensorboard_train_index += 1 131 | 132 | for X_val_a, y_val_a in iterate.minibatches( 133 | X_val, y_val, batch_size, shuffle=True): 134 | dp_dict = dict_to_one( network.all_drop ) # disable noise layers 135 | feed_dict = {x: X_val_a, y_: y_val_a} 136 | feed_dict.update(dp_dict) 137 | result = sess.run(merged, feed_dict=feed_dict) 138 | val_writer.add_summary(result, tensorboard_val_index) 139 | tensorboard_val_index += 1 140 | 141 | if epoch + 1 == 1 or (epoch + 1) % print_freq == 0: 142 | if (X_val is not None) and (y_val is not None): 143 | print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) 144 | if eval_train is True: 145 | train_loss, train_acc, n_batch = 0, 0, 0 146 | for X_train_a, y_train_a in iterate.minibatches( 147 | X_train, y_train, batch_size, shuffle=True): 148 | dp_dict = dict_to_one( network.all_drop ) # disable noise layers 149 | feed_dict = {x: X_train_a, y_: y_train_a} 150 | feed_dict.update(dp_dict) 151 | if acc is not None: 152 | err, ac = sess.run([cost, acc], feed_dict=feed_dict) 153 | train_acc += ac 154 | else: 155 | err = sess.run(cost, feed_dict=feed_dict) 156 | train_loss += err; n_batch += 1 157 | print(" train loss: %f" % (train_loss/ n_batch)) 158 | if acc is not None: 159 | print(" train acc: %f" % (train_acc/ n_batch)) 160 | val_loss, val_acc, n_batch = 0, 0, 0 161 | for X_val_a, y_val_a in iterate.minibatches( 162 | X_val, y_val, batch_size, shuffle=True): 163 | dp_dict = dict_to_one( network.all_drop ) # disable noise layers 164 | feed_dict = {x: X_val_a, y_: y_val_a} 165 | feed_dict.update(dp_dict) 166 | if acc is not None: 167 | err, ac = sess.run([cost, acc], feed_dict=feed_dict) 168 | val_acc += ac 169 | else: 170 | err = sess.run(cost, feed_dict=feed_dict) 171 | val_loss += err; n_batch += 1 172 | print(" val loss: %f" % (val_loss/ n_batch)) 173 | if acc is not None: 174 | print(" val acc: %f" % (val_acc/ n_batch)) 175 | else: 176 | print("Epoch %d of %d took %fs, loss %f" % (epoch + 1, n_epoch, time.time() - start_time, loss_ep)) 177 | print("Total training time: %fs" % (time.time() - start_time_begin)) 178 | 179 | 180 | def test(sess, network, acc, X_test, y_test, x, y_, batch_size, cost=None): 181 | """ 182 | Test a given non time-series network by the given test data and metric. 183 | 184 | Parameters 185 | ---------- 186 | sess : TensorFlow session 187 | sess = tf.InteractiveSession() 188 | network : a TensorLayer layer 189 | the network will be trained 190 | acc : the TensorFlow expression of accuracy (or other metric) or None 191 | if None, would not display the metric 192 | X_test : numpy array 193 | the input of test data 194 | y_test : numpy array 195 | the target of test data 196 | x : placeholder 197 | for inputs 198 | y_ : placeholder 199 | for targets 200 | batch_size : int or None 201 | batch size for testing, when dataset is large, we should use minibatche for testing. 202 | when dataset is small, we can set it to None. 203 | cost : the TensorFlow expression of cost or None 204 | if None, would not display the cost 205 | 206 | Examples 207 | -------- 208 | >>> see tutorial_mnist_simple.py 209 | >>> tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost) 210 | """ 211 | print('Start testing the network ...') 212 | if batch_size is None: 213 | dp_dict = dict_to_one( network.all_drop ) 214 | feed_dict = {x: X_test, y_: y_test} 215 | feed_dict.update(dp_dict) 216 | if cost is not None: 217 | print(" test loss: %f" % sess.run(cost, feed_dict=feed_dict)) 218 | print(" test acc: %f" % sess.run(acc, feed_dict=feed_dict)) 219 | # print(" test acc: %f" % np.mean(y_test == sess.run(y_op, 220 | # feed_dict=feed_dict))) 221 | else: 222 | test_loss, test_acc, n_batch = 0, 0, 0 223 | for X_test_a, y_test_a in iterate.minibatches( 224 | X_test, y_test, batch_size, shuffle=True): 225 | dp_dict = dict_to_one( network.all_drop ) # disable noise layers 226 | feed_dict = {x: X_test_a, y_: y_test_a} 227 | feed_dict.update(dp_dict) 228 | if cost is not None: 229 | err, ac = sess.run([cost, acc], feed_dict=feed_dict) 230 | test_loss += err 231 | else: 232 | ac = sess.run(acc, feed_dict=feed_dict) 233 | test_acc += ac; n_batch += 1 234 | if cost is not None: 235 | print(" test loss: %f" % (test_loss/ n_batch)) 236 | print(" test acc: %f" % (test_acc/ n_batch)) 237 | 238 | 239 | def predict(sess, network, X, x, y_op): 240 | """ 241 | Return the predict results of given non time-series network. 242 | 243 | Parameters 244 | ---------- 245 | sess : TensorFlow session 246 | sess = tf.InteractiveSession() 247 | network : a TensorLayer layer 248 | the network will be trained 249 | X : numpy array 250 | the input 251 | x : placeholder 252 | for inputs 253 | y_op : placeholder 254 | the argmax expression of softmax outputs 255 | 256 | Examples 257 | -------- 258 | >>> see tutorial_mnist_simple.py 259 | >>> y = network.outputs 260 | >>> y_op = tf.argmax(tf.nn.softmax(y), 1) 261 | >>> print(tl.utils.predict(sess, network, X_test, x, y_op)) 262 | """ 263 | dp_dict = dict_to_one( network.all_drop ) # disable noise layers 264 | feed_dict = {x: X,} 265 | feed_dict.update(dp_dict) 266 | return sess.run(y_op, feed_dict=feed_dict) 267 | 268 | ## Evaluation 269 | def evaluation(y_test=None, y_predict=None, n_classes=None): 270 | """ 271 | Input the predicted results, targets results and 272 | the number of class, return the confusion matrix, F1-score of each class, 273 | accuracy and macro F1-score. 274 | 275 | Parameters 276 | ---------- 277 | y_test : numpy.array or list 278 | target results 279 | y_predict : numpy.array or list 280 | predicted results 281 | n_classes : int 282 | number of classes 283 | 284 | Examples 285 | -------- 286 | >>> c_mat, f1, acc, f1_macro = evaluation(y_test, y_predict, n_classes) 287 | """ 288 | from sklearn.metrics import confusion_matrix, f1_score, accuracy_score 289 | c_mat = confusion_matrix(y_test, y_predict, labels = [x for x in range(n_classes)]) 290 | f1 = f1_score(y_test, y_predict, average = None, labels = [x for x in range(n_classes)]) 291 | f1_macro = f1_score(y_test, y_predict, average='macro') 292 | acc = accuracy_score(y_test, y_predict) 293 | print('confusion matrix: \n',c_mat) 294 | print('f1-score:',f1) 295 | print('f1-score(macro):',f1_macro) # same output with > f1_score(y_true, y_pred, average='macro') 296 | print('accuracy-score:', acc) 297 | return c_mat, f1, acc, f1_macro 298 | 299 | def dict_to_one(dp_dict={}): 300 | """ 301 | Input a dictionary, return a dictionary that all items are set to one, 302 | use for disable dropout, dropconnect layer and so on. 303 | 304 | Parameters 305 | ---------- 306 | dp_dict : dictionary 307 | keeping probabilities 308 | 309 | Examples 310 | -------- 311 | >>> dp_dict = dict_to_one( network.all_drop ) 312 | >>> dp_dict = dict_to_one( network.all_drop ) 313 | >>> feed_dict.update(dp_dict) 314 | """ 315 | return {x: 1 for x in dp_dict} 316 | 317 | def flatten_list(list_of_list=[[],[]]): 318 | """ 319 | Input a list of list, return a list that all items are in a list. 320 | 321 | Parameters 322 | ---------- 323 | list_of_list : a list of list 324 | 325 | Examples 326 | -------- 327 | >>> tl.utils.flatten_list([[1, 2, 3],[4, 5],[6]]) 328 | ... [1, 2, 3, 4, 5, 6] 329 | """ 330 | return sum(list_of_list, []) 331 | 332 | 333 | def class_balancing_oversample(X_train=None, y_train=None, printable=True): 334 | """Input the features and labels, return the features and labels after oversampling. 335 | 336 | Parameters 337 | ---------- 338 | X_train : numpy.array 339 | Features, each row is an example 340 | y_train : numpy.array 341 | Labels 342 | 343 | Examples 344 | -------- 345 | >>> X_train, y_train = class_balancing_oversample(X_train, y_train, printable=True) 346 | """ 347 | # ======== Classes balancing 348 | if printable: 349 | print("Classes balancing for training examples...") 350 | from collections import Counter 351 | c = Counter(y_train) 352 | if printable: 353 | print('the occurrence number of each stage: %s' % c.most_common()) 354 | print('the least stage is Label %s have %s instances' % c.most_common()[-1]) 355 | print('the most stage is Label %s have %s instances' % c.most_common(1)[0]) 356 | most_num = c.most_common(1)[0][1] 357 | if printable: 358 | print('most num is %d, all classes tend to be this num' % most_num) 359 | 360 | locations = {} 361 | number = {} 362 | 363 | for lab, num in c.most_common(): # find the index from y_train 364 | number[lab] = num 365 | locations[lab] = np.where(np.array(y_train)==lab)[0] 366 | if printable: 367 | print('convert list(np.array) to dict format') 368 | X = {} # convert list to dict 369 | for lab, num in number.items(): 370 | X[lab] = X_train[locations[lab]] 371 | 372 | # oversampling 373 | if printable: 374 | print('start oversampling') 375 | for key in X: 376 | temp = X[key] 377 | while True: 378 | if len(X[key]) >= most_num: 379 | break 380 | X[key] = np.vstack((X[key], temp)) 381 | if printable: 382 | print('first features of label 0 >', len(X[0][0])) 383 | print('the occurrence num of each stage after oversampling') 384 | for key in X: 385 | print(key, len(X[key])) 386 | if printable: 387 | print('make each stage have same num of instances') 388 | for key in X: 389 | X[key] = X[key][0:most_num,:] 390 | print(key, len(X[key])) 391 | 392 | # convert dict to list 393 | if printable: 394 | print('convert from dict to list format') 395 | y_train = [] 396 | X_train = np.empty(shape=(0,len(X[0][0]))) 397 | for key in X: 398 | X_train = np.vstack( (X_train, X[key] ) ) 399 | y_train.extend([key for i in range(len(X[key]))]) 400 | # print(len(X_train), len(y_train)) 401 | c = Counter(y_train) 402 | if printable: 403 | print('the occurrence number of each stage after oversampling: %s' % c.most_common()) 404 | # ================ End of Classes balancing 405 | return X_train, y_train 406 | 407 | 408 | 409 | 410 | # 411 | # def class_balancing_sequence_4D(X_train, y_train, sequence_length, model='downsampling' ,printable=True): 412 | # ''' 输入、输出都是sequence format 413 | # oversampling or downsampling 414 | # ''' 415 | # n_features = X_train.shape[2] 416 | # # ======== Classes balancing for sequence 417 | # if printable: 418 | # print("Classes balancing for 4D sequence training examples...") 419 | # from collections import Counter 420 | # c = Counter(y_train) # Counter({2: 454, 4: 267, 3: 124, 1: 57, 0: 48}) 421 | # if printable: 422 | # print('the occurrence number of each stage: %s' % c.most_common()) 423 | # print('the least Label %s have %s instances' % c.most_common()[-1]) 424 | # print('the most Label %s have %s instances' % c.most_common(1)[0]) 425 | # # print(c.most_common()) # [(2, 454), (4, 267), (3, 124), (1, 57), (0, 48)] 426 | # most_num = c.most_common(1)[0][1] 427 | # less_num = c.most_common()[-1][1] 428 | # 429 | # locations = {} 430 | # number = {} 431 | # for lab, num in c.most_common(): 432 | # number[lab] = num 433 | # locations[lab] = np.where(np.array(y_train)==lab)[0] 434 | # # print(locations) 435 | # # print(number) 436 | # if printable: 437 | # print(' convert list to dict') 438 | # X = {} # convert list to dict 439 | # ### a sequence 440 | # for lab, _ in number.items(): 441 | # X[lab] = np.empty(shape=(0,1,n_features,1)) # 4D 442 | # for lab, _ in number.items(): 443 | # #X[lab] = X_train[locations[lab] 444 | # for l in locations[lab]: 445 | # X[lab] = np.vstack((X[lab], X_train[l*sequence_length : (l+1)*(sequence_length)])) 446 | # # X[lab] = X_train[locations[lab]*sequence_length : locations[lab]*(sequence_length+1)] # a sequence 447 | # # print(X) 448 | # 449 | # if model=='oversampling': 450 | # if printable: 451 | # print(' oversampling -- most num is %d, all classes tend to be this num\nshuffle applied' % most_num) 452 | # for key in X: 453 | # temp = X[key] 454 | # while True: 455 | # if len(X[key]) >= most_num * sequence_length: # sequence 456 | # break 457 | # X[key] = np.vstack((X[key], temp)) 458 | # # print(key, len(X[key])) 459 | # if printable: 460 | # print(' make each stage have same num of instances') 461 | # for key in X: 462 | # X[key] = X[key][0:most_num*sequence_length,:] # sequence 463 | # if printable: 464 | # print(key, len(X[key])) 465 | # elif model=='downsampling': 466 | # import random 467 | # if printable: 468 | # print(' downsampling -- less num is %d, all classes tend to be this num by randomly choice without replacement\nshuffle applied' % less_num) 469 | # for key in X: 470 | # # print(key, len(X[key]))#, len(X[key])/sequence_length) 471 | # s_idx = [ i for i in range(int(len(X[key])/sequence_length))] 472 | # s_idx = np.asarray(s_idx)*sequence_length # start index of sequnce in X[key] 473 | # # print('s_idx',s_idx) 474 | # r_idx = np.random.choice(s_idx, less_num, replace=False) # random choice less_num of s_idx 475 | # # print('r_idx',r_idx) 476 | # temp = X[key] 477 | # X[key] = np.empty(shape=(0,1,n_features,1)) # 4D 478 | # for idx in r_idx: 479 | # X[key] = np.vstack((X[key], temp[idx:idx+sequence_length])) 480 | # # print(key, X[key]) 481 | # # np.random.choice(l, len(l), replace=False) 482 | # else: 483 | # raise Exception(' model should be oversampling or downsampling') 484 | # 485 | # # convert dict to list 486 | # if printable: 487 | # print(' convert dict to list') 488 | # y_train = [] 489 | # # X_train = np.empty(shape=(0,len(X[0][0]))) 490 | # # X_train = np.empty(shape=(0,len(X[1][0]))) # 2D 491 | # X_train = np.empty(shape=(0,1,n_features,1)) # 4D 492 | # l_key = list(X.keys()) # shuffle 493 | # random.shuffle(l_key) # shuffle 494 | # # for key in X: # no shuffle 495 | # for key in l_key: # shuffle 496 | # X_train = np.vstack( (X_train, X[key] ) ) 497 | # # print(len(X[key])) 498 | # y_train.extend([key for i in range(int(len(X[key])/sequence_length))]) 499 | # # print(X_train,y_train, type(X_train), type(y_train)) 500 | # # ================ End of Classes balancing for sequence 501 | # # print(X_train.shape, len(y_train)) 502 | # return X_train, np.asarray(y_train) 503 | -------------------------------------------------------------------------------- /tensorlayer/visualize.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import os 9 | 10 | 11 | 12 | def W(W=None, second=10, saveable=True, shape=[28,28], name='mnist', fig_idx=2396512): 13 | """Visualize every columns of the weight matrix to a group of Greyscale img. 14 | 15 | Parameters 16 | ---------- 17 | W : numpy.array 18 | The weight matrix 19 | second : int 20 | The display second(s) for the image(s), if saveable is False. 21 | saveable : boolean 22 | Save or plot the figure. 23 | shape : a list with 2 int 24 | The shape of feature image, MNIST is [28, 80]. 25 | name : a string 26 | A name to save the image, if saveable is True. 27 | fig_idx : int 28 | matplotlib figure index. 29 | 30 | Examples 31 | -------- 32 | >>> tl.visualize.W(network.all_params[0].eval(), second=10, saveable=True, name='weight_of_1st_layer', fig_idx=2012) 33 | """ 34 | if saveable is False: 35 | plt.ion() 36 | fig = plt.figure(fig_idx) # show all feature images 37 | size = W.shape[0] 38 | n_units = W.shape[1] 39 | 40 | num_r = int(np.sqrt(n_units)) # 每行显示的个数 若25个hidden unit -> 每行显示5个 41 | num_c = int(np.ceil(n_units/num_r)) 42 | count = int(1) 43 | for row in range(1, num_r+1): 44 | for col in range(1, num_c+1): 45 | if count > n_units: 46 | break 47 | a = fig.add_subplot(num_r, num_c, count) 48 | # ------------------------------------------------------------ 49 | # plt.imshow(np.reshape(W[:,count-1],(28,28)), cmap='gray') 50 | # ------------------------------------------------------------ 51 | feature = W[:,count-1] / np.sqrt( (W[:,count-1]**2).sum()) 52 | # feature[feature<0.0001] = 0 # value threshold 53 | # if count == 1 or count == 2: 54 | # print(np.mean(feature)) 55 | # if np.std(feature) < 0.03: # condition threshold 56 | # feature = np.zeros_like(feature) 57 | # if np.mean(feature) < -0.015: # condition threshold 58 | # feature = np.zeros_like(feature) 59 | plt.imshow(np.reshape(feature ,(shape[0],shape[1])), 60 | cmap='gray', interpolation="nearest")#, vmin=np.min(feature), vmax=np.max(feature)) 61 | # plt.title(name) 62 | # ------------------------------------------------------------ 63 | # plt.imshow(np.reshape(W[:,count-1] ,(np.sqrt(size),np.sqrt(size))), cmap='gray', interpolation="nearest") 64 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick 65 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 66 | count = count + 1 67 | if saveable: 68 | plt.savefig(name+'.pdf',format='pdf') 69 | else: 70 | plt.draw() 71 | plt.pause(second) 72 | 73 | def frame(I=None, second=5, saveable=True, name='frame', cmap=None, fig_idx=12836): 74 | """Display a frame(image). Make sure OpenAI Gym render() is disable before using it. 75 | 76 | Parameters 77 | ---------- 78 | I : numpy.array 79 | The image 80 | second : int 81 | The display second(s) for the image(s), if saveable is False. 82 | saveable : boolean 83 | Save or plot the figure. 84 | name : a string 85 | A name to save the image, if saveable is True. 86 | cmap : None or string 87 | 'gray' for greyscale, None for default, etc. 88 | fig_idx : int 89 | matplotlib figure index. 90 | 91 | Examples 92 | -------- 93 | >>> env = gym.make("Pong-v0") 94 | >>> observation = env.reset() 95 | >>> tl.visualize.frame(observation) 96 | """ 97 | if saveable is False: 98 | plt.ion() 99 | fig = plt.figure(fig_idx) # show all feature images 100 | 101 | if len(I.shape) and I.shape[-1]==1: # (10,10,1) --> (10,10) 102 | I = I[:,:,0] 103 | 104 | plt.imshow(I, cmap) 105 | plt.title(name) 106 | # plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick 107 | # plt.gca().yaxis.set_major_locator(plt.NullLocator()) 108 | 109 | if saveable: 110 | plt.savefig(name+'.pdf',format='pdf') 111 | else: 112 | plt.draw() 113 | plt.pause(second) 114 | 115 | def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362): 116 | """Display a group of RGB or Greyscale CNN masks. 117 | 118 | Parameters 119 | ---------- 120 | CNN : numpy.array 121 | The image. e.g: 64 5x5 RGB images can be (5, 5, 3, 64). 122 | second : int 123 | The display second(s) for the image(s), if saveable is False. 124 | saveable : boolean 125 | Save or plot the figure. 126 | name : a string 127 | A name to save the image, if saveable is True. 128 | fig_idx : int 129 | matplotlib figure index. 130 | 131 | Examples 132 | -------- 133 | >>> tl.visualize.CNN2d(network.all_params[0].eval(), second=10, saveable=True, name='cnn1_mnist', fig_idx=2012) 134 | """ 135 | # print(CNN.shape) # (5, 5, 3, 64) 136 | # exit() 137 | n_mask = CNN.shape[3] 138 | n_row = CNN.shape[0] 139 | n_col = CNN.shape[1] 140 | n_color = CNN.shape[2] 141 | row = int(np.sqrt(n_mask)) 142 | col = int(np.ceil(n_mask/row)) 143 | plt.ion() # active mode 144 | fig = plt.figure(fig_idx) 145 | count = 1 146 | for ir in range(1, row+1): 147 | for ic in range(1, col+1): 148 | if count > n_mask: 149 | break 150 | a = fig.add_subplot(col, row, count) 151 | # print(CNN[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 152 | # exit() 153 | # plt.imshow( 154 | # np.reshape(CNN[count-1,:,:,:], (n_row, n_col)), 155 | # cmap='gray', interpolation="nearest") # theano 156 | if n_color == 1: 157 | plt.imshow( 158 | np.reshape(CNN[:,:,:,count-1], (n_row, n_col)), 159 | cmap='gray', interpolation="nearest") 160 | elif n_color == 3: 161 | plt.imshow( 162 | np.reshape(CNN[:,:,:,count-1], (n_row, n_col, n_color)), 163 | cmap='gray', interpolation="nearest") 164 | else: 165 | raise Exception("Unknown n_color") 166 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick 167 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 168 | count = count + 1 169 | if saveable: 170 | plt.savefig(name+'.pdf',format='pdf') 171 | else: 172 | plt.draw() 173 | plt.pause(second) 174 | 175 | 176 | def images2d(images=None, second=10, saveable=True, name='images', dtype=None, 177 | fig_idx=3119362): 178 | """Display a group of RGB or Greyscale images. 179 | 180 | Parameters 181 | ---------- 182 | images : numpy.array 183 | The images. 184 | second : int 185 | The display second(s) for the image(s), if saveable is False. 186 | saveable : boolean 187 | Save or plot the figure. 188 | name : a string 189 | A name to save the image, if saveable is True. 190 | dtype : None or numpy data type 191 | The data type for displaying the images. 192 | fig_idx : int 193 | matplotlib figure index. 194 | 195 | Examples 196 | -------- 197 | >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False) 198 | >>> tl.visualize.images2d(X_train[0:100,:,:,:], second=10, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212) 199 | """ 200 | # print(images.shape) # (50000, 32, 32, 3) 201 | # exit() 202 | if dtype: 203 | images = np.asarray(images, dtype=dtype) 204 | n_mask = images.shape[0] 205 | n_row = images.shape[1] 206 | n_col = images.shape[2] 207 | n_color = images.shape[3] 208 | row = int(np.sqrt(n_mask)) 209 | col = int(np.ceil(n_mask/row)) 210 | plt.ion() # active mode 211 | fig = plt.figure(fig_idx) 212 | count = 1 213 | for ir in range(1, row+1): 214 | for ic in range(1, col+1): 215 | if count > n_mask: 216 | break 217 | a = fig.add_subplot(col, row, count) 218 | # print(images[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 219 | # plt.imshow( 220 | # np.reshape(images[count-1,:,:,:], (n_row, n_col)), 221 | # cmap='gray', interpolation="nearest") # theano 222 | if n_color == 1: 223 | plt.imshow( 224 | np.reshape(images[count-1,:,:], (n_row, n_col)), 225 | cmap='gray', interpolation="nearest") 226 | # plt.title(name) 227 | elif n_color == 3: 228 | plt.imshow(images[count-1,:,:], 229 | cmap='gray', interpolation="nearest") 230 | # plt.title(name) 231 | else: 232 | raise Exception("Unknown n_color") 233 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick 234 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 235 | count = count + 1 236 | if saveable: 237 | plt.savefig(name+'.pdf',format='pdf') 238 | else: 239 | plt.draw() 240 | plt.pause(second) 241 | 242 | def tsne_embedding(embeddings, reverse_dictionary, plot_only=500, 243 | second=5, saveable=False, name='tsne', fig_idx=9862): 244 | """Visualize the embeddings by using t-SNE. 245 | 246 | Parameters 247 | ---------- 248 | embeddings : a matrix 249 | The images. 250 | reverse_dictionary : a dictionary 251 | id_to_word, mapping id to unique word. 252 | plot_only : int 253 | The number of examples to plot, choice the most common words. 254 | second : int 255 | The display second(s) for the image(s), if saveable is False. 256 | saveable : boolean 257 | Save or plot the figure. 258 | name : a string 259 | A name to save the image, if saveable is True. 260 | fig_idx : int 261 | matplotlib figure index. 262 | 263 | Examples 264 | -------- 265 | >>> see 'tutorial_word2vec_basic.py' 266 | >>> final_embeddings = normalized_embeddings.eval() 267 | >>> tl.visualize.tsne_embedding(final_embeddings, labels, reverse_dictionary, 268 | ... plot_only=500, second=5, saveable=False, name='tsne') 269 | """ 270 | def plot_with_labels(low_dim_embs, labels, figsize=(18, 18), second=5, 271 | saveable=True, name='tsne', fig_idx=9862): 272 | assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" 273 | if saveable is False: 274 | plt.ion() 275 | plt.figure(fig_idx) 276 | plt.figure(figsize=figsize) #in inches 277 | for i, label in enumerate(labels): 278 | x, y = low_dim_embs[i,:] 279 | plt.scatter(x, y) 280 | plt.annotate(label, 281 | xy=(x, y), 282 | xytext=(5, 2), 283 | textcoords='offset points', 284 | ha='right', 285 | va='bottom') 286 | if saveable: 287 | plt.savefig(name+'.pdf',format='pdf') 288 | else: 289 | plt.draw() 290 | plt.pause(second) 291 | 292 | try: 293 | from sklearn.manifold import TSNE 294 | import matplotlib.pyplot as plt 295 | from six.moves import xrange 296 | 297 | tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) 298 | # plot_only = 500 299 | low_dim_embs = tsne.fit_transform(embeddings[:plot_only,:]) 300 | labels = [reverse_dictionary[i] for i in xrange(plot_only)] 301 | plot_with_labels(low_dim_embs, labels, second=second, saveable=saveable, \ 302 | name=name, fig_idx=fig_idx) 303 | except ImportError: 304 | print("Please install sklearn and matplotlib to visualize embeddings.") 305 | 306 | 307 | # 308 | -------------------------------------------------------------------------------- /tensorlayer1.2.2/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Deep learning and Reinforcement learning library for Researchers and Engineers 3 | """ 4 | # from __future__ import absolute_import 5 | 6 | 7 | try: 8 | install_instr = "Please make sure you install a recent enough version of TensorFlow." 9 | import tensorflow 10 | except ImportError: 11 | raise ImportError("__init__.py : Could not import TensorFlow." + install_instr) 12 | 13 | from . import activation 14 | from . import cost 15 | from . import files 16 | # from . import init 17 | from . import iterate 18 | from . import layers 19 | from . import ops 20 | from . import utils 21 | from . import visualize 22 | from . import prepro # was preprocesse 23 | from . import nlp 24 | from . import rein 25 | 26 | 27 | __version__ = "1.2.3" 28 | -------------------------------------------------------------------------------- /tensorlayer1.2.2/activation.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | import tensorflow as tf 7 | 8 | def identity(x, name=None): 9 | """The identity activation function 10 | 11 | Parameters 12 | ---------- 13 | x : a tensor input 14 | input(s) 15 | 16 | 17 | Returns 18 | -------- 19 | A `Tensor` with the same type as `x`. 20 | """ 21 | return x 22 | 23 | # Shortcut 24 | linear = identity 25 | 26 | def ramp(x=None, v_min=0, v_max=1, name=None): 27 | """The ramp activation function. 28 | 29 | Parameters 30 | ---------- 31 | x : a tensor input 32 | input(s) 33 | v_min : float 34 | if input(s) smaller than v_min, change inputs to v_min 35 | v_max : float 36 | if input(s) greater than v_max, change inputs to v_max 37 | name : a string or None 38 | An optional name to attach to this activation function. 39 | 40 | 41 | Returns 42 | -------- 43 | A `Tensor` with the same type as `x`. 44 | """ 45 | return tf.clip_by_value(x, clip_value_min=v_min, clip_value_max=v_max, name=name) 46 | 47 | def leaky_relu(x=None, alpha=0.1, name="LeakyReLU"): 48 | """The LeakyReLU. 49 | 50 | Modified version of ReLU, introducing a nonzero gradient for negative 51 | input. 52 | 53 | Parameters 54 | ---------- 55 | x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, 56 | `int16`, or `int8`. 57 | alpha : `float`. slope. 58 | name : a string or None 59 | An optional name to attach to this activation function. 60 | 61 | References 62 | ------------ 63 | - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013) `_ 64 | """ 65 | with tf.name_scope(name) as scope: 66 | # x = tf.nn.relu(x) 67 | # m_x = tf.nn.relu(-x) 68 | # x -= alpha * m_x 69 | x = tf.maximum(x, alpha * x) 70 | return x 71 | 72 | #Shortcut 73 | lrelu = leaky_relu 74 | 75 | 76 | ## Alternatively we can use tl.layers.PReluLayer() 77 | def prelu(x, channel_shared=False, W_init=tf.constant_initializer(value=0.0), W_init_args={}, restore=True, name="PReLU"): 78 | """ Parametric Rectified Linear Unit. 79 | 80 | Parameters 81 | ---------- 82 | x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, 83 | `int16`, or `int8`. 84 | channel_shared : `bool`. Single weight is shared by all channels 85 | W_init: weights initializer, default zero constant. 86 | The initializer for initializing the alphas. 87 | restore : `bool`. Restore or not alphas 88 | name : A name for this activation op (optional). 89 | 90 | Returns 91 | ------- 92 | A `Tensor` with the same type as `x`. 93 | 94 | References 95 | ----------- 96 | - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification `_ 97 | """ 98 | print(' prelu: untested !!!') 99 | if channel_shared: 100 | w_shape = (1,) 101 | else: 102 | w_shape = int(x._shape[-1:]) 103 | 104 | with tf.name_scope(name) as scope: 105 | W_init = initializations.get(weights_init)() 106 | alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=W_init, **W_init_args ) 107 | x = tf.nn.relu(x) + tf.mul(alphas, (x - tf.abs(x))) * 0.5 108 | 109 | return x 110 | -------------------------------------------------------------------------------- /tensorlayer1.2.2/cost.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | import tensorflow as tf 7 | import numbers 8 | from tensorflow.python.framework import ops 9 | from tensorflow.python.ops import standard_ops 10 | 11 | ## Cost Functions 12 | def cross_entropy(output, target, name="cross_entropy_loss"): 13 | """Returns the TensorFlow expression of cross-entropy of two distributions, implement 14 | softmax internally. 15 | 16 | Parameters 17 | ---------- 18 | output : Tensorflow variable 19 | A distribution with shape: [batch_size, n_feature]. 20 | target : Tensorflow variable 21 | A distribution with shape: [batch_size, n_feature]. 22 | 23 | Examples 24 | -------- 25 | >>> ce = tf.cost.cross_entropy(y_logits, y_target_logits) 26 | 27 | References 28 | ----------- 29 | - About cross-entropy: `wiki `_.\n 30 | - The code is borrowed from: `here `_. 31 | """ 32 | with tf.name_scope(name): 33 | # net_output_tf = output 34 | # target_tf = target 35 | # cross_entropy = tf.add(tf.mul(tf.log(net_output_tf, name=None),target_tf), 36 | # tf.mul(tf.log(1 - net_output_tf), (1 - target_tf))) 37 | # return -1 * tf.reduce_mean(tf.reduce_sum(cross_entropy, 1), name='cross_entropy_mean') 38 | return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(output, target)) 39 | 40 | # Undocumented 41 | def binary_cross_entropy(preds, targets, name=None): 42 | """Computes binary cross entropy given `preds`. 43 | 44 | For brevity, let `x = `, `z = targets`. The logistic loss is 45 | 46 | loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i])) 47 | 48 | Parameters 49 | ---------- 50 | preds : A `Tensor` of type `float32` or `float64`. 51 | targets : A `Tensor` of the same type and shape as `preds`. 52 | """ 53 | print("Undocumented") 54 | from tensorflow.python.framework import ops 55 | eps = 1e-12 56 | with ops.op_scope([preds, targets], name, "bce_loss") as name: 57 | preds = ops.convert_to_tensor(preds, name="preds") 58 | targets = ops.convert_to_tensor(targets, name="targets") 59 | return tf.reduce_mean(-(targets * tf.log(preds + eps) + 60 | (1. - targets) * tf.log(1. - preds + eps))) 61 | 62 | 63 | def mean_squared_error(output, target): 64 | """Return the TensorFlow expression of mean-squre-error of two distributions. 65 | 66 | Parameters 67 | ---------- 68 | output : tensorflow variable 69 | A distribution with shape: [batch_size, n_feature]. 70 | target : tensorflow variable 71 | A distribution with shape: [batch_size, n_feature]. 72 | """ 73 | with tf.name_scope("mean_squared_error_loss"): 74 | mse = tf.reduce_sum(tf.squared_difference(output, target), reduction_indices = 1) 75 | return tf.reduce_mean(mse) 76 | 77 | def cross_entropy_seq(logits, target_seqs, batch_size=1, num_steps=None): 78 | """Returns the expression of cross-entropy of two sequences, implement 79 | softmax internally. Normally be used for Fixed Length RNN outputs. 80 | 81 | Parameters 82 | ---------- 83 | logits : Tensorflow variable 84 | 2D tensor, ``network.outputs``, [batch_size*n_steps (n_examples), number of output units] 85 | target_seqs : Tensorflow variable 86 | target : 2D tensor [batch_size, n_steps], if the number of step is dynamic, please use ``cross_entropy_seq_with_mask`` instead. 87 | batch_size : a int, default is 1 88 | RNN batch_size, number of concurrent processes, divide the loss by batch_size. 89 | num_steps : a int 90 | sequence length 91 | 92 | Examples 93 | -------- 94 | >>> see PTB tutorial for more details 95 | >>> input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) 96 | >>> targets = tf.placeholder(tf.int32, [batch_size, num_steps]) 97 | >>> cost = tf.cost.cross_entropy_seq(network.outputs, targets, batch_size, num_steps) 98 | """ 99 | loss = tf.nn.seq2seq.sequence_loss_by_example( 100 | [logits], 101 | [tf.reshape(target_seqs, [-1])], 102 | [tf.ones([batch_size * num_steps])]) 103 | cost = tf.reduce_sum(loss) / batch_size 104 | return cost 105 | 106 | 107 | def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=False): 108 | """Returns the expression of cross-entropy of two sequences, implement 109 | softmax internally. Normally be used for Dynamic RNN outputs. 110 | 111 | Parameters 112 | ----------- 113 | logits : network identity outputs 114 | 2D tensor, ``network.outputs``, [batch_size, number of output units]. 115 | target_seqs : int of tensor, like word ID. 116 | [batch_size, ?] 117 | input_mask : the mask to compute loss 118 | The same size with target_seqs, normally 0 and 1. 119 | return_details : boolean 120 | If False (default), only returns the loss 121 | 122 | If True, returns the loss, losses, weights and targets (reshape to one vetcor) 123 | 124 | Examples 125 | -------- 126 | - see Image Captioning Example. 127 | """ 128 | print(" cross_entropy_seq_with_mask : Undocumented") 129 | targets = tf.reshape(target_seqs, [-1]) # to one vector 130 | weights = tf.to_float(tf.reshape(input_mask, [-1])) # to one vector like targets 131 | losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, targets) 132 | loss = tf.div(tf.reduce_sum(tf.mul(losses, weights)), # loss from mask. reduce_sum before element-wise mul with mask !! 133 | tf.reduce_sum(weights), 134 | name="seq_loss_with_mask") 135 | if return_details: 136 | return loss, losses, weights, targets 137 | else: 138 | return loss 139 | 140 | ## Regularization Functions 141 | def li_regularizer(scale): 142 | """li regularization removes the neurons of previous layer, `i` represents `inputs`.\n 143 | Returns a function that can be used to apply group li regularization to weights.\n 144 | The implementation follows `TensorFlow contrib `_. 145 | 146 | 147 | 148 | Parameters 149 | ---------- 150 | scale : float 151 | A scalar multiplier `Tensor`. 0.0 disables the regularizer. 152 | 153 | Returns 154 | -------- 155 | A function with signature `li(weights, name=None)` that apply L1 regularization. 156 | 157 | Raises 158 | ------ 159 | ValueError : if scale is outside of the range [0.0, 1.0] or if scale is not a float. 160 | """ 161 | import numbers 162 | from tensorflow.python.framework import ops 163 | from tensorflow.python.ops import standard_ops 164 | # from tensorflow.python.platform import tf_logging as logging 165 | 166 | if isinstance(scale, numbers.Integral): 167 | raise ValueError('scale cannot be an integer: %s' % scale) 168 | if isinstance(scale, numbers.Real): 169 | if scale < 0.: 170 | raise ValueError('Setting a scale less than 0 on a regularizer: %g' % 171 | scale) 172 | if scale >= 1.: 173 | raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % 174 | scale) 175 | if scale == 0.: 176 | logging.info('Scale of 0 disables regularizer.') 177 | return lambda _, name=None: None 178 | 179 | def li(weights, name=None): 180 | """Applies li regularization to weights.""" 181 | with ops.op_scope([weights], name, 'li_regularizer') as scope: 182 | my_scale = ops.convert_to_tensor(scale, 183 | dtype=weights.dtype.base_dtype, 184 | name='scale') 185 | return standard_ops.mul( 186 | my_scale, 187 | standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))), 188 | name=scope) 189 | return li 190 | 191 | def lo_regularizer(scale): 192 | """lo regularization removes the neurons of current layer, `o` represents `outputs`\n 193 | Returns a function that can be used to apply group lo regularization to weights.\n 194 | The implementation follows `TensorFlow contrib `_. 195 | 196 | Parameters 197 | ---------- 198 | scale : float 199 | A scalar multiplier `Tensor`. 0.0 disables the regularizer. 200 | 201 | Returns 202 | ------- 203 | A function with signature `lo(weights, name=None)` that apply Lo regularization. 204 | 205 | Raises 206 | ------ 207 | ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. 208 | """ 209 | import numbers 210 | from tensorflow.python.framework import ops 211 | from tensorflow.python.ops import standard_ops 212 | # from tensorflow.python.platform import tf_logging as logging 213 | 214 | if isinstance(scale, numbers.Integral): 215 | raise ValueError('scale cannot be an integer: %s' % scale) 216 | if isinstance(scale, numbers.Real): 217 | if scale < 0.: 218 | raise ValueError('Setting a scale less than 0 on a regularizer: %g' % 219 | scale) 220 | if scale >= 1.: 221 | raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % 222 | scale) 223 | if scale == 0.: 224 | logging.info('Scale of 0 disables regularizer.') 225 | return lambda _, name=None: None 226 | 227 | def lo(weights, name=None): 228 | """Applies group column regularization to weights.""" 229 | with ops.op_scope([weights], name, 'lo_regularizer') as scope: 230 | my_scale = ops.convert_to_tensor(scale, 231 | dtype=weights.dtype.base_dtype, 232 | name='scale') 233 | return standard_ops.mul( 234 | my_scale, 235 | standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))), 236 | name=scope) 237 | return lo 238 | 239 | def maxnorm_regularizer(scale=1.0): 240 | """Max-norm regularization returns a function that can be used 241 | to apply max-norm regularization to weights. 242 | About max-norm: `wiki `_.\n 243 | The implementation follows `TensorFlow contrib `_. 244 | 245 | Parameters 246 | ---------- 247 | scale : float 248 | A scalar multiplier `Tensor`. 0.0 disables the regularizer. 249 | 250 | Returns 251 | --------- 252 | A function with signature `mn(weights, name=None)` that apply Lo regularization. 253 | 254 | Raises 255 | -------- 256 | ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. 257 | """ 258 | import numbers 259 | from tensorflow.python.framework import ops 260 | from tensorflow.python.ops import standard_ops 261 | 262 | if isinstance(scale, numbers.Integral): 263 | raise ValueError('scale cannot be an integer: %s' % scale) 264 | if isinstance(scale, numbers.Real): 265 | if scale < 0.: 266 | raise ValueError('Setting a scale less than 0 on a regularizer: %g' % 267 | scale) 268 | # if scale >= 1.: 269 | # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % 270 | # scale) 271 | if scale == 0.: 272 | logging.info('Scale of 0 disables regularizer.') 273 | return lambda _, name=None: None 274 | 275 | def mn(weights, name=None): 276 | """Applies max-norm regularization to weights.""" 277 | with ops.op_scope([weights], name, 'maxnorm_regularizer') as scope: 278 | my_scale = ops.convert_to_tensor(scale, 279 | dtype=weights.dtype.base_dtype, 280 | name='scale') 281 | return standard_ops.mul(my_scale, standard_ops.reduce_max(standard_ops.abs(weights)), name=scope) 282 | return mn 283 | 284 | def maxnorm_o_regularizer(scale): 285 | """Max-norm output regularization removes the neurons of current layer.\n 286 | Returns a function that can be used to apply max-norm regularization to each column of weight matrix.\n 287 | The implementation follows `TensorFlow contrib `_. 288 | 289 | Parameters 290 | ---------- 291 | scale : float 292 | A scalar multiplier `Tensor`. 0.0 disables the regularizer. 293 | 294 | Returns 295 | --------- 296 | A function with signature `mn_o(weights, name=None)` that apply Lo regularization. 297 | 298 | Raises 299 | --------- 300 | ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. 301 | """ 302 | import numbers 303 | from tensorflow.python.framework import ops 304 | from tensorflow.python.ops import standard_ops 305 | 306 | if isinstance(scale, numbers.Integral): 307 | raise ValueError('scale cannot be an integer: %s' % scale) 308 | if isinstance(scale, numbers.Real): 309 | if scale < 0.: 310 | raise ValueError('Setting a scale less than 0 on a regularizer: %g' % 311 | scale) 312 | # if scale >= 1.: 313 | # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % 314 | # scale) 315 | if scale == 0.: 316 | logging.info('Scale of 0 disables regularizer.') 317 | return lambda _, name=None: None 318 | 319 | def mn_o(weights, name=None): 320 | """Applies max-norm regularization to weights.""" 321 | with ops.op_scope([weights], name, 'maxnorm_o_regularizer') as scope: 322 | my_scale = ops.convert_to_tensor(scale, 323 | dtype=weights.dtype.base_dtype, 324 | name='scale') 325 | return standard_ops.mul(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope) 326 | return mn_o 327 | 328 | def maxnorm_i_regularizer(scale): 329 | """Max-norm input regularization removes the neurons of previous layer.\n 330 | Returns a function that can be used to apply max-norm regularization to each row of weight matrix.\n 331 | The implementation follows `TensorFlow contrib `_. 332 | 333 | Parameters 334 | ---------- 335 | scale : float 336 | A scalar multiplier `Tensor`. 0.0 disables the regularizer. 337 | 338 | Returns 339 | --------- 340 | A function with signature `mn_i(weights, name=None)` that apply Lo regularization. 341 | 342 | Raises 343 | --------- 344 | ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. 345 | """ 346 | import numbers 347 | from tensorflow.python.framework import ops 348 | from tensorflow.python.ops import standard_ops 349 | 350 | if isinstance(scale, numbers.Integral): 351 | raise ValueError('scale cannot be an integer: %s' % scale) 352 | if isinstance(scale, numbers.Real): 353 | if scale < 0.: 354 | raise ValueError('Setting a scale less than 0 on a regularizer: %g' % 355 | scale) 356 | # if scale >= 1.: 357 | # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % 358 | # scale) 359 | if scale == 0.: 360 | logging.info('Scale of 0 disables regularizer.') 361 | return lambda _, name=None: None 362 | 363 | def mn_i(weights, name=None): 364 | """Applies max-norm regularization to weights.""" 365 | with ops.op_scope([weights], name, 'maxnorm_o_regularizer') as scope: 366 | my_scale = ops.convert_to_tensor(scale, 367 | dtype=weights.dtype.base_dtype, 368 | name='scale') 369 | return standard_ops.mul(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope) 370 | return mn_i 371 | 372 | 373 | 374 | 375 | 376 | # 377 | -------------------------------------------------------------------------------- /tensorlayer1.2.2/iterate.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | import numpy as np 7 | from six.moves import xrange 8 | 9 | def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False): 10 | """ 11 | Generate a generator that input a group of example in numpy.array and 12 | their labels, return the examples and labels by the given batchsize. 13 | 14 | Parameters 15 | ---------- 16 | inputs : numpy.array 17 | (X) The input features, every row is a example. 18 | targets : numpy.array 19 | (y) The labels of inputs, every row is a example. 20 | batch_size : int 21 | The batch size. 22 | shuffle : boolean 23 | Indicating whether to use a shuffling queue, shuffle the dataset before return. 24 | 25 | Examples 26 | -------- 27 | >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) 28 | >>> y = np.asarray([0,1,2,3,4,5]) 29 | >>> for batch in tl.iterate.minibatches(inputs=X, targets=y, batch_size=2, shuffle=False): 30 | >>> print(batch) 31 | ... (array([['a', 'a'], 32 | ... ['b', 'b']], 33 | ... dtype='>> Synced sequence input and output. 60 | >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) 61 | >>> y = np.asarray([0, 1, 2, 3, 4, 5]) 62 | >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=y, batch_size=2, seq_length=2, stride=1): 63 | >>> print(batch) 64 | ... (array([['a', 'a'], 65 | ... ['b', 'b'], 66 | ... ['b', 'b'], 67 | ... ['c', 'c']], 68 | ... dtype='>> Many to One 77 | >>> return_last = True 78 | >>> num_steps = 2 79 | >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) 80 | >>> Y = np.asarray([0,1,2,3,4,5]) 81 | >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=Y, batch_size=2, seq_length=num_steps, stride=1): 82 | >>> x, y = batch 83 | >>> if return_last: 84 | >>> tmp_y = y.reshape((-1, num_steps) + y.shape[1:]) 85 | >>> y = tmp_y[:, -1] 86 | >>> print(x, y) 87 | ... [['a' 'a'] 88 | ... ['b' 'b'] 89 | ... ['b' 'b'] 90 | ... ['c' 'c']] [1 2] 91 | ... [['c' 'c'] 92 | ... ['d' 'd'] 93 | ... ['d' 'd'] 94 | ... ['e' 'e']] [3 4] 95 | """ 96 | assert len(inputs) == len(targets) 97 | n_loads = (batch_size * stride) + (seq_length - stride) 98 | for start_idx in range(0, len(inputs) - n_loads + 1, (batch_size * stride)): 99 | seq_inputs = np.zeros((batch_size, seq_length) + inputs.shape[1:], 100 | dtype=inputs.dtype) 101 | seq_targets = np.zeros((batch_size, seq_length) + targets.shape[1:], 102 | dtype=targets.dtype) 103 | for b_idx in xrange(batch_size): 104 | start_seq_idx = start_idx + (b_idx * stride) 105 | end_seq_idx = start_seq_idx + seq_length 106 | seq_inputs[b_idx] = inputs[start_seq_idx:end_seq_idx] 107 | seq_targets[b_idx] = targets[start_seq_idx:end_seq_idx] 108 | flatten_inputs = seq_inputs.reshape((-1,) + inputs.shape[1:]) 109 | flatten_targets = seq_targets.reshape((-1,) + targets.shape[1:]) 110 | yield flatten_inputs, flatten_targets 111 | 112 | def seq_minibatches2(inputs, targets, batch_size, num_steps): 113 | """ 114 | Generate a generator that iterates on two list of words. Yields (Returns) the source contexts and 115 | the target context by the given batch_size and num_steps (sequence_length), 116 | see ``PTB tutorial``. 117 | 118 | Hint, if the input data are images, you can modify the code from 119 | 120 | data = np.zeros([batch_size, batch_len) 121 | 122 | to 123 | 124 | data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]]) 125 | 126 | 127 | In TensorFlow's tutorial, this generates batch_size pointers into the raw 128 | PTB data, and allows minibatch iteration along these pointers. 129 | 130 | Parameters 131 | ---------- 132 | inputs : a list 133 | the context in list format; note that context usually be 134 | represented by splitting by space, and then convert to unique 135 | word IDs. 136 | targets : a list 137 | the context in list format; note that context usually be 138 | represented by splitting by space, and then convert to unique 139 | word IDs. 140 | batch_size : int 141 | the batch size. 142 | num_steps : int 143 | the number of unrolls. i.e. sequence_length 144 | 145 | Yields 146 | ------ 147 | Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. 148 | 149 | Raises 150 | ------ 151 | ValueError : if batch_size or num_steps are too high. 152 | 153 | Examples 154 | -------- 155 | >>> X = [i for i in range(20)] 156 | >>> Y = [i for i in range(20,40)] 157 | >>> for batch in tl.iterate.seq_minibatches2(X, Y, batch_size=2, num_steps=3): 158 | ... x, y = batch 159 | ... print(x, y) 160 | ... 161 | ... [[ 0. 1. 2.] 162 | ... [ 10. 11. 12.]] 163 | ... [[ 20. 21. 22.] 164 | ... [ 30. 31. 32.]] 165 | ... 166 | ... [[ 3. 4. 5.] 167 | ... [ 13. 14. 15.]] 168 | ... [[ 23. 24. 25.] 169 | ... [ 33. 34. 35.]] 170 | ... 171 | ... [[ 6. 7. 8.] 172 | ... [ 16. 17. 18.]] 173 | ... [[ 26. 27. 28.] 174 | ... [ 36. 37. 38.]] 175 | 176 | Code References 177 | --------------- 178 | - ``tensorflow/models/rnn/ptb/reader.py`` 179 | """ 180 | assert len(inputs) == len(targets) 181 | data_len = len(inputs) 182 | batch_len = data_len // batch_size 183 | # data = np.zeros([batch_size, batch_len]) 184 | data = np.zeros((batch_size, batch_len) + inputs.shape[1:], 185 | dtype=inputs.dtype) 186 | data2 = np.zeros([batch_size, batch_len]) 187 | 188 | for i in range(batch_size): 189 | data[i] = inputs[batch_len * i:batch_len * (i + 1)] 190 | data2[i] = targets[batch_len * i:batch_len * (i + 1)] 191 | 192 | epoch_size = (batch_len - 1) // num_steps 193 | 194 | if epoch_size == 0: 195 | raise ValueError("epoch_size == 0, decrease batch_size or num_steps") 196 | 197 | for i in range(epoch_size): 198 | x = data[:, i*num_steps:(i+1)*num_steps] 199 | x2 = data2[:, i*num_steps:(i+1)*num_steps] 200 | yield (x, x2) 201 | 202 | 203 | def ptb_iterator(raw_data, batch_size, num_steps): 204 | """ 205 | Generate a generator that iterates on a list of words, see PTB tutorial. Yields (Returns) the source contexts and 206 | the target context by the given batch_size and num_steps (sequence_length).\n 207 | see ``PTB tutorial``. 208 | 209 | e.g. x = [0, 1, 2] y = [1, 2, 3] , when batch_size = 1, num_steps = 3, 210 | raw_data = [i for i in range(100)] 211 | 212 | In TensorFlow's tutorial, this generates batch_size pointers into the raw 213 | PTB data, and allows minibatch iteration along these pointers. 214 | 215 | Parameters 216 | ---------- 217 | raw_data : a list 218 | the context in list format; note that context usually be 219 | represented by splitting by space, and then convert to unique 220 | word IDs. 221 | batch_size : int 222 | the batch size. 223 | num_steps : int 224 | the number of unrolls. i.e. sequence_length 225 | 226 | Yields 227 | ------ 228 | Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. 229 | The second element of the tuple is the same data time-shifted to the 230 | right by one. 231 | 232 | Raises 233 | ------ 234 | ValueError : if batch_size or num_steps are too high. 235 | 236 | Examples 237 | -------- 238 | >>> train_data = [i for i in range(20)] 239 | >>> for batch in tl.iterate.ptb_iterator(train_data, batch_size=2, num_steps=3): 240 | >>> x, y = batch 241 | >>> print(x, y) 242 | ... [[ 0 1 2] <---x 1st subset/ iteration 243 | ... [10 11 12]] 244 | ... [[ 1 2 3] <---y 245 | ... [11 12 13]] 246 | ... 247 | ... [[ 3 4 5] <--- 1st batch input 2nd subset/ iteration 248 | ... [13 14 15]] <--- 2nd batch input 249 | ... [[ 4 5 6] <--- 1st batch target 250 | ... [14 15 16]] <--- 2nd batch target 251 | ... 252 | ... [[ 6 7 8] 3rd subset/ iteration 253 | ... [16 17 18]] 254 | ... [[ 7 8 9] 255 | ... [17 18 19]] 256 | 257 | Code References 258 | ---------------- 259 | - ``tensorflow/models/rnn/ptb/reader.py`` 260 | """ 261 | raw_data = np.array(raw_data, dtype=np.int32) 262 | 263 | data_len = len(raw_data) 264 | batch_len = data_len // batch_size 265 | data = np.zeros([batch_size, batch_len], dtype=np.int32) 266 | for i in range(batch_size): 267 | data[i] = raw_data[batch_len * i:batch_len * (i + 1)] 268 | 269 | epoch_size = (batch_len - 1) // num_steps 270 | 271 | if epoch_size == 0: 272 | raise ValueError("epoch_size == 0, decrease batch_size or num_steps") 273 | 274 | for i in range(epoch_size): 275 | x = data[:, i*num_steps:(i+1)*num_steps] 276 | y = data[:, i*num_steps+1:(i+1)*num_steps+1] 277 | yield (x, y) 278 | 279 | 280 | 281 | # def minibatches_for_sequence2D(inputs, targets, batch_size, sequence_length, stride=1): 282 | # """ 283 | # Input a group of example in 2D numpy.array and their labels. 284 | # Return the examples and labels by the given batchsize, sequence_length. 285 | # Use for RNN. 286 | # 287 | # Parameters 288 | # ---------- 289 | # inputs : numpy.array 290 | # (X) The input features, every row is a example. 291 | # targets : numpy.array 292 | # (y) The labels of inputs, every row is a example. 293 | # batchsize : int 294 | # The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0 295 | # sequence_length : int 296 | # The sequence length 297 | # stride : int 298 | # The stride step 299 | # 300 | # Examples 301 | # -------- 302 | # >>> sequence_length = 2 303 | # >>> batch_size = 4 304 | # >>> stride = 1 305 | # >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]]) 306 | # >>> y_train = np.asarray(['0','1','2','3','4','5','6','7']) 307 | # >>> print('X_train = %s' % X_train) 308 | # >>> print('y_train = %s' % y_train) 309 | # >>> for batch in minibatches_for_sequence2D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride): 310 | # >>> inputs, targets = batch 311 | # >>> print(inputs) 312 | # >>> print(targets) 313 | # ... [[ 1. 2. 3.] 314 | # ... [ 4. 5. 6.] 315 | # ... [ 4. 5. 6.] 316 | # ... [ 7. 8. 9.]] 317 | # ... [1 2] 318 | # ... [[ 4. 5. 6.] 319 | # ... [ 7. 8. 9.] 320 | # ... [ 7. 8. 9.] 321 | # ... [ 10. 11. 12.]] 322 | # ... [2 3] 323 | # ... ... 324 | # ... [[ 16. 17. 18.] 325 | # ... [ 19. 20. 21.] 326 | # ... [ 19. 20. 21.] 327 | # ... [ 22. 23. 24.]] 328 | # ... [6 7] 329 | # """ 330 | # print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride)) 331 | # assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length 332 | # # assert int(batch_size % sequence_length) == 0, 'batch_size % sequence_length must == 0\ 333 | # # batch_size is number of examples rather than number of targets' 334 | # 335 | # # print(inputs.shape, len(inputs), len(inputs[0])) 336 | # 337 | # n_targets = int(batch_size/sequence_length) 338 | # # n_targets = int(np.ceil(batch_size/sequence_length)) 339 | # X = np.empty(shape=(0,len(inputs[0])), dtype=np.float32) 340 | # y = np.zeros(shape=(1, n_targets), dtype=np.int32) 341 | # 342 | # for idx in range(sequence_length, len(inputs), stride): # go through all example during 1 epoch 343 | # for n in range(n_targets): # for num of target 344 | # X = np.concatenate((X, inputs[idx-sequence_length+n:idx+n])) 345 | # y[0][n] = targets[idx-1+n] 346 | # # y = np.vstack((y, targets[idx-1+n])) 347 | # yield X, y[0] 348 | # X = np.empty(shape=(0,len(inputs[0]))) 349 | # # y = np.empty(shape=(1,0)) 350 | # 351 | # 352 | # def minibatches_for_sequence4D(inputs, targets, batch_size, sequence_length, stride=1): # 353 | # """ 354 | # Input a group of example in 4D numpy.array and their labels. 355 | # Return the examples and labels by the given batchsize, sequence_length. 356 | # Use for RNN. 357 | # 358 | # Parameters 359 | # ---------- 360 | # inputs : numpy.array 361 | # (X) The input features, every row is a example. 362 | # targets : numpy.array 363 | # (y) The labels of inputs, every row is a example. 364 | # batchsize : int 365 | # The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0 366 | # sequence_length : int 367 | # The sequence length 368 | # stride : int 369 | # The stride step 370 | # 371 | # Examples 372 | # -------- 373 | # >>> sequence_length = 2 374 | # >>> batch_size = 2 375 | # >>> stride = 1 376 | # >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]]) 377 | # >>> y_train = np.asarray(['0','1','2','3','4','5','6','7']) 378 | # >>> X_train = np.expand_dims(X_train, axis=1) 379 | # >>> X_train = np.expand_dims(X_train, axis=3) 380 | # >>> for batch in minibatches_for_sequence4D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride): 381 | # >>> inputs, targets = batch 382 | # >>> print(inputs) 383 | # >>> print(targets) 384 | # ... [[[[ 1.] 385 | # ... [ 2.] 386 | # ... [ 3.]]] 387 | # ... [[[ 4.] 388 | # ... [ 5.] 389 | # ... [ 6.]]]] 390 | # ... [1] 391 | # ... [[[[ 4.] 392 | # ... [ 5.] 393 | # ... [ 6.]]] 394 | # ... [[[ 7.] 395 | # ... [ 8.] 396 | # ... [ 9.]]]] 397 | # ... [2] 398 | # ... ... 399 | # ... [[[[ 19.] 400 | # ... [ 20.] 401 | # ... [ 21.]]] 402 | # ... [[[ 22.] 403 | # ... [ 23.] 404 | # ... [ 24.]]]] 405 | # ... [7] 406 | # """ 407 | # print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride)) 408 | # assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length 409 | # # assert int(batch_size % sequence_length) == 0, 'in LSTM, batch_size % sequence_length must == 0\ 410 | # # batch_size is number of X_train rather than number of targets' 411 | # assert stride >= 1, 'stride must be >=1, at least move 1 step for each iternation' 412 | # 413 | # n_example, n_channels, width, height = inputs.shape 414 | # print('n_example=%d n_channels=%d width=%d height=%d' % (n_example, n_channels, width, height)) 415 | # 416 | # n_targets = int(np.ceil(batch_size/sequence_length)) # 实际为 batchsize/sequence_length + 1 417 | # print(n_targets) 418 | # X = np.zeros(shape=(batch_size, n_channels, width, height), dtype=np.float32) 419 | # # X = np.zeros(shape=(n_targets, sequence_length, n_channels, width, height), dtype=np.float32) 420 | # y = np.zeros(shape=(1,n_targets), dtype=np.int32) 421 | # # y = np.empty(shape=(0,1), dtype=np.float32) 422 | # # time.sleep(2) 423 | # for idx in range(sequence_length, n_example-n_targets+2, stride): # go through all example during 1 epoch 424 | # for n in range(n_targets): # for num of target 425 | # # print(idx+n, inputs[idx-sequence_length+n : idx+n].shape) 426 | # X[n*sequence_length : (n+1)*sequence_length] = inputs[idx+n-sequence_length : idx+n] 427 | # # X[n] = inputs[idx-sequence_length+n:idx+n] 428 | # y[0][n] = targets[idx+n-1] 429 | # # y = np.vstack((y, targets[idx-1+n])) 430 | # # y = targets[idx: idx+n_targets] 431 | # yield X, y[0] 432 | -------------------------------------------------------------------------------- /tensorlayer1.2.2/ops.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | 7 | import tensorflow as tf 8 | import os 9 | import sys 10 | from sys import platform as _platform 11 | from .layers import set_keep 12 | 13 | 14 | def exit_tf(sess=None): 15 | """Close tensorboard and nvidia-process if available 16 | 17 | Parameters 18 | ---------- 19 | sess : a session instance of TensorFlow 20 | TensorFlow session 21 | """ 22 | text = "Close tensorboard and nvidia-process if available" 23 | sess.close() 24 | # import time 25 | # time.sleep(2) 26 | if _platform == "linux" or _platform == "linux2": 27 | print('linux: %s' % text) 28 | os.system('nvidia-smi') 29 | os.system('fuser 6006/tcp -k') # kill tensorboard 6006 30 | os.system("nvidia-smi | grep python |awk '{print $3}'|xargs kill") # kill all nvidia-smi python process 31 | elif _platform == "darwin": 32 | print('OS X: %s' % text) 33 | os.system("lsof -i tcp:6006 | grep -v PID | awk '{print $2}' | xargs kill") # kill tensorboard 6006 34 | elif _platform == "win32": 35 | print('Windows: %s' % text) 36 | else: 37 | print(_platform) 38 | exit() 39 | 40 | def clear_all(printable=True): 41 | """Clears all the placeholder variables of keep prob, 42 | including keeping probabilities of all dropout, denoising, dropconnect etc. 43 | 44 | Parameters 45 | ---------- 46 | printable : boolean 47 | If True, print all deleted variables. 48 | """ 49 | print('clear all .....................................') 50 | gl = globals().copy() 51 | for var in gl: 52 | if var[0] == '_': continue 53 | if 'func' in str(globals()[var]): continue 54 | if 'module' in str(globals()[var]): continue 55 | if 'class' in str(globals()[var]): continue 56 | 57 | if printable: 58 | print(" clear_all ------- %s" % str(globals()[var])) 59 | 60 | del globals()[var] 61 | 62 | # def clear_all2(vars, printable=True): 63 | # """ 64 | # The :function:`clear_all()` Clears all the placeholder variables of keep prob, 65 | # including keeping probabilities of all dropout, denoising, dropconnect 66 | # Parameters 67 | # ---------- 68 | # printable : if True, print all deleted variables. 69 | # """ 70 | # print('clear all .....................................') 71 | # for var in vars: 72 | # if var[0] == '_': continue 73 | # if 'func' in str(var): continue 74 | # if 'module' in str(var): continue 75 | # if 'class' in str(var): continue 76 | # 77 | # if printable: 78 | # print(" clear_all ------- %s" % str(var)) 79 | # 80 | # del var 81 | 82 | def set_gpu_fraction(sess=None, gpu_fraction=0.3): 83 | """Set the GPU memory fraction for the application. 84 | 85 | Parameters 86 | ---------- 87 | sess : a session instance of TensorFlow 88 | TensorFlow session 89 | gpu_fraction : a float 90 | Fraction of GPU memory, (0 ~ 1] 91 | 92 | References 93 | ---------- 94 | - `TensorFlow using GPU `_ 95 | """ 96 | print(" tensorlayer: GPU MEM Fraction %f" % gpu_fraction) 97 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) 98 | sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) 99 | return sess 100 | 101 | 102 | 103 | 104 | 105 | def disable_print(): 106 | """Disable console output. 107 | 108 | Examples 109 | --------- 110 | >>> print("You can see me") 111 | >>> tl.ops.disable_print() 112 | >>> print(" You can't see me") 113 | >>> tl.ops.enable_print() 114 | >>> print("You can see me") 115 | """ 116 | # sys.stdout = os.devnull # this one kill the process 117 | sys.stdout = None 118 | sys.stderr = os.devnull 119 | 120 | def enable_print(): 121 | """Enable console output. 122 | 123 | Examples 124 | -------- 125 | - see tl.ops.disable_print() 126 | """ 127 | sys.stdout = sys.__stdout__ 128 | sys.stderr = sys.__stderr__ 129 | 130 | 131 | class temporary_disable_print: 132 | """Temporarily disable console output. 133 | 134 | Examples 135 | --------- 136 | >>> print("You can see me") 137 | >>> with tl.ops.temporary_disable_print() as t: 138 | >>> print("You can't see me") 139 | >>> print("You can see me") 140 | """ 141 | def __init__(self): 142 | pass 143 | def __enter__(self): 144 | sys.stdout = None 145 | sys.stderr = os.devnull 146 | def __exit__(self, type, value, traceback): 147 | sys.stdout = sys.__stdout__ 148 | sys.stderr = sys.__stderr__ 149 | return isinstance(value, TypeError) 150 | 151 | 152 | 153 | 154 | 155 | def get_site_packages_directory(): 156 | """Print and return the site-packages directory. 157 | 158 | Examples 159 | --------- 160 | >>> loc = tl.ops.get_site_packages_directory() 161 | """ 162 | import site 163 | try: 164 | loc = site.getsitepackages() 165 | print(" tl.ops : site-packages in ", loc) 166 | return loc 167 | except: 168 | print(" tl.ops : Cannot find package dir from virtual environment") 169 | return False 170 | 171 | 172 | 173 | 174 | # 175 | -------------------------------------------------------------------------------- /tensorlayer1.2.2/prepro.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | import tensorflow as tf 6 | import tensorlayer as tl 7 | import numpy as np 8 | import time 9 | import numbers 10 | 11 | 12 | def distorted_images(images=None, height=24, width=24): 13 | """Distort images for generating more training data. 14 | 15 | Features 16 | --------- 17 | They are cropped to height * width pixels randomly. 18 | 19 | They are approximately whitened to make the model insensitive to dynamic range. 20 | 21 | Randomly flip the image from left to right. 22 | 23 | Randomly distort the image brightness. 24 | 25 | Randomly distort the image contrast. 26 | 27 | Whiten (Normalize) the images. 28 | 29 | Parameters 30 | ---------- 31 | images : 4D Tensor 32 | The tensor or placeholder of images 33 | height : int 34 | The height for random crop. 35 | width : int 36 | The width for random crop. 37 | 38 | Returns 39 | ------- 40 | result : tuple of Tensor 41 | (Tensor for distorted images, Tensor for while loop index) 42 | 43 | Examples 44 | -------- 45 | >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False) 46 | >>> sess = tf.InteractiveSession() 47 | >>> batch_size = 128 48 | >>> x = tf.placeholder(tf.float32, shape=[batch_size, 32, 32, 3]) 49 | >>> distorted_images_op = tl.preprocess.distorted_images(images=x, height=24, width=24) 50 | >>> sess.run(tf.initialize_all_variables()) 51 | >>> feed_dict={x: X_train[0:batch_size,:,:,:]} 52 | >>> distorted_images, idx = sess.run(distorted_images_op, feed_dict=feed_dict) 53 | >>> tl.visualize.images2d(X_train[0:9,:,:,:], second=2, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212) 54 | >>> tl.visualize.images2d(distorted_images[1:10,:,:,:], second=10, saveable=False, name='distorted_images', dtype=None, fig_idx=23012) 55 | 56 | Notes 57 | ------ 58 | - The first image in 'distorted_images' should be removed. 59 | 60 | References 61 | ----------- 62 | - `tensorflow.models.image.cifar10.cifar10_input `_ 63 | """ 64 | print(" [Warning] distorted_images will be deprecated due to speed, see TFRecord tutorial for more info...") 65 | try: 66 | batch_size = int(images._shape[0]) 67 | except: 68 | raise Exception('unknow batch_size of images') 69 | distorted_x = tf.Variable(tf.constant(0.1, shape=[1, height, width, 3])) 70 | i = tf.Variable(tf.constant(0)) 71 | 72 | c = lambda distorted_x, i: tf.less(i, batch_size) 73 | 74 | def body(distorted_x, i): 75 | # 1. Randomly crop a [height, width] section of the image. 76 | image = tf.random_crop(tf.gather(images, i), [height, width, 3]) 77 | # 2. Randomly flip the image horizontally. 78 | image = tf.image.random_flip_left_right(image) 79 | # 3. Randomly change brightness. 80 | image = tf.image.random_brightness(image, max_delta=63) 81 | # 4. Randomly change contrast. 82 | image = tf.image.random_contrast(image, lower=0.2, upper=1.8) 83 | # 5. Subtract off the mean and divide by the variance of the pixels. 84 | image = tf.image.per_image_whitening(image) 85 | # 6. Append the image to a batch. 86 | image = tf.expand_dims(image, 0) 87 | return tf.concat(0, [distorted_x, image]), tf.add(i, 1) 88 | 89 | result = tf.while_loop(cond=c, body=body, loop_vars=(distorted_x, i), parallel_iterations=16) 90 | return result 91 | 92 | 93 | def crop_central_whiten_images(images=None, height=24, width=24): 94 | """Crop the central of image, and normailize it for test data. 95 | 96 | They are cropped to central of height * width pixels. 97 | 98 | Whiten (Normalize) the images. 99 | 100 | Parameters 101 | ---------- 102 | images : 4D Tensor 103 | The tensor or placeholder of images 104 | height : int 105 | The height for central crop. 106 | width: int 107 | The width for central crop. 108 | 109 | Returns 110 | ------- 111 | result : tuple Tensor 112 | (Tensor for distorted images, Tensor for while loop index) 113 | 114 | Examples 115 | -------- 116 | >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False) 117 | >>> sess = tf.InteractiveSession() 118 | >>> batch_size = 128 119 | >>> x = tf.placeholder(tf.float32, shape=[batch_size, 32, 32, 3]) 120 | >>> central_images_op = tl.preprocess.crop_central_whiten_images(images=x, height=24, width=24) 121 | >>> sess.run(tf.initialize_all_variables()) 122 | >>> feed_dict={x: X_train[0:batch_size,:,:,:]} 123 | >>> central_images, idx = sess.run(central_images_op, feed_dict=feed_dict) 124 | >>> tl.visualize.images2d(X_train[0:9,:,:,:], second=2, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212) 125 | >>> tl.visualize.images2d(central_images[1:10,:,:,:], second=10, saveable=False, name='central_images', dtype=None, fig_idx=23012) 126 | 127 | Notes 128 | ------ 129 | The first image in 'central_images' should be removed. 130 | 131 | Code References 132 | ---------------- 133 | - ``tensorflow.models.image.cifar10.cifar10_input`` 134 | """ 135 | print(" [Warning] crop_central_whiten_images will be deprecated due to speed, see TFRecord tutorial for more info...") 136 | try: 137 | batch_size = int(images._shape[0]) 138 | except: 139 | raise Exception('unknow batch_size of images') 140 | central_x = tf.Variable(tf.constant(0.1, shape=[1, height, width, 3])) 141 | i = tf.Variable(tf.constant(0)) 142 | 143 | c = lambda central_x, i: tf.less(i, batch_size) 144 | 145 | def body(central_x, i): 146 | # 1. Crop the central [height, width] of the image. 147 | image = tf.image.resize_image_with_crop_or_pad(tf.gather(images, i), height, width) 148 | # 2. Subtract off the mean and divide by the variance of the pixels. 149 | image = tf.image.per_image_whitening(image) 150 | # 5. Append the image to a batch. 151 | image = tf.expand_dims(image, 0) 152 | return tf.concat(0, [central_x, image]), tf.add(i, 1) 153 | 154 | result = tf.while_loop(cond=c, body=body, loop_vars=(central_x, i), parallel_iterations=16) 155 | return result 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | # 169 | -------------------------------------------------------------------------------- /tensorlayer1.2.2/rein.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | from six.moves import xrange 9 | 10 | def discount_episode_rewards(rewards=[], gamma=0.99): 11 | """ Take 1D float array of rewards and compute discounted rewards for an 12 | episode. When encount a non-zero value, consider as the end a of an episode. 13 | 14 | Parameters 15 | ---------- 16 | rewards : numpy list 17 | a list of rewards 18 | gamma : float 19 | discounted factor 20 | 21 | Examples 22 | ---------- 23 | >>> rewards = np.asarray([0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1]) 24 | >>> gamma = 0.9 25 | >>> discount_rewards = tl.rein.discount_episode_rewards(rewards, gamma) 26 | >>> print(discount_rewards) 27 | ... [ 0.72899997 0.81 0.89999998 1. 0.72899997 0.81 28 | ... 0.89999998 1. 0.72899997 0.81 0.89999998 1. ] 29 | """ 30 | discounted_r = np.zeros_like(rewards, dtype=np.float32) 31 | running_add = 0 32 | for t in reversed(xrange(0, rewards.size)): 33 | if rewards[t] != 0: running_add = 0 34 | 35 | running_add = running_add * gamma + rewards[t] 36 | discounted_r[t] = running_add 37 | return discounted_r 38 | 39 | 40 | def cross_entropy_reward_loss(logits, actions, rewards): 41 | """ Calculate the loss for Policy Gradient Network. 42 | 43 | Parameters 44 | ---------- 45 | logits : tensor 46 | The network outputs without softmax. This function implements softmax 47 | inside. 48 | actions : tensor/ placeholder 49 | The agent actions. 50 | rewards : tensor/ placeholder 51 | The rewards. 52 | 53 | Examples 54 | ---------- 55 | >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D]) # observation for training 56 | >>> network = tl.layers.InputLayer(states_batch_pl, name='input_layer') 57 | >>> network = tl.layers.DenseLayer(network, n_units=H, act = tf.nn.relu, name='relu1') 58 | >>> network = tl.layers.DenseLayer(network, n_units=3, act = tl.activation.identity, name='output_layer') 59 | >>> probs = network.outputs 60 | >>> sampling_prob = tf.nn.softmax(probs) 61 | >>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None]) 62 | >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None]) 63 | >>> loss = cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl) 64 | >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss) 65 | """ 66 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, actions) 67 | loss = tf.reduce_sum(tf.mul(cross_entropy, rewards)) # element-wise mul 68 | return loss 69 | -------------------------------------------------------------------------------- /tensorlayer1.2.2/utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | import tensorflow as tf 4 | from . import iterate 5 | import numpy as np 6 | import time 7 | 8 | 9 | def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_size=100, n_epoch=100, print_freq=5, X_val=None, y_val=None, eval_train=True): 10 | """ 11 | Traing a given non time-series network by the given cost function, training data, batch_size, n_epoch etc. 12 | 13 | Parameters 14 | ---------- 15 | sess : TensorFlow session 16 | sess = tf.InteractiveSession() 17 | network : a TensorLayer layer 18 | the network will be trained 19 | train_op : a TensorFlow optimizer 20 | like tf.train.AdamOptimizer 21 | X_train : numpy array 22 | the input of training data 23 | y_train : numpy array 24 | the target of training data 25 | x : placeholder 26 | for inputs 27 | y_ : placeholder 28 | for targets 29 | acc : the TensorFlow expression of accuracy (or other metric) or None 30 | if None, would not display the metric 31 | batch_size : int 32 | batch size for training and evaluating 33 | n_epoch : int 34 | the number of training epochs 35 | print_freq : int 36 | display the training information every ``print_freq`` epochs 37 | X_val : numpy array or None 38 | the input of validation data 39 | y_val : numpy array or None 40 | the target of validation data 41 | eval_train : boolen 42 | if X_val and y_val are not None, it refects whether to evaluate the training data 43 | 44 | Examples 45 | -------- 46 | >>> see tutorial_mnist_simple.py 47 | >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_, 48 | ... acc=acc, batch_size=500, n_epoch=200, print_freq=5, 49 | ... X_val=X_val, y_val=y_val, eval_train=False) 50 | """ 51 | assert X_train.shape[0] >= batch_size, "Number of training examples should be bigger than the batch size" 52 | print("Start training the network ...") 53 | start_time_begin = time.time() 54 | for epoch in range(n_epoch): 55 | start_time = time.time() 56 | loss_ep = 0; n_step = 0 57 | for X_train_a, y_train_a in iterate.minibatches(X_train, y_train, 58 | batch_size, shuffle=True): 59 | feed_dict = {x: X_train_a, y_: y_train_a} 60 | feed_dict.update( network.all_drop ) # enable noise layers 61 | loss, _ = sess.run([cost, train_op], feed_dict=feed_dict) 62 | loss_ep += loss 63 | n_step += 1 64 | loss_ep = loss_ep/ n_step 65 | 66 | if epoch + 1 == 1 or (epoch + 1) % print_freq == 0: 67 | if (X_val is not None) and (y_val is not None): 68 | print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) 69 | if eval_train is True: 70 | train_loss, train_acc, n_batch = 0, 0, 0 71 | for X_train_a, y_train_a in iterate.minibatches( 72 | X_train, y_train, batch_size, shuffle=True): 73 | dp_dict = dict_to_one( network.all_drop ) # disable noise layers 74 | feed_dict = {x: X_train_a, y_: y_train_a} 75 | feed_dict.update(dp_dict) 76 | if acc is not None: 77 | err, ac = sess.run([cost, acc], feed_dict=feed_dict) 78 | train_acc += ac 79 | else: 80 | err = sess.run(cost, feed_dict=feed_dict) 81 | train_loss += err; n_batch += 1 82 | print(" train loss: %f" % (train_loss/ n_batch)) 83 | if acc is not None: 84 | print(" train acc: %f" % (train_acc/ n_batch)) 85 | val_loss, val_acc, n_batch = 0, 0, 0 86 | for X_val_a, y_val_a in iterate.minibatches( 87 | X_val, y_val, batch_size, shuffle=True): 88 | dp_dict = dict_to_one( network.all_drop ) # disable noise layers 89 | feed_dict = {x: X_val_a, y_: y_val_a} 90 | feed_dict.update(dp_dict) 91 | if acc is not None: 92 | err, ac = sess.run([cost, acc], feed_dict=feed_dict) 93 | val_acc += ac 94 | else: 95 | err = sess.run(cost, feed_dict=feed_dict) 96 | val_loss += err; n_batch += 1 97 | print(" val loss: %f" % (val_loss/ n_batch)) 98 | if acc is not None: 99 | print(" val acc: %f" % (val_acc/ n_batch)) 100 | else: 101 | print("Epoch %d of %d took %fs, loss %f" % (epoch + 1, n_epoch, time.time() - start_time, loss_ep)) 102 | print("Total training time: %fs" % (time.time() - start_time_begin)) 103 | 104 | 105 | def test(sess, network, acc, X_test, y_test, x, y_, batch_size, cost=None): 106 | """ 107 | Test a given non time-series network by the given test data and metric. 108 | 109 | Parameters 110 | ---------- 111 | sess : TensorFlow session 112 | sess = tf.InteractiveSession() 113 | network : a TensorLayer layer 114 | the network will be trained 115 | acc : the TensorFlow expression of accuracy (or other metric) or None 116 | if None, would not display the metric 117 | X_test : numpy array 118 | the input of test data 119 | y_test : numpy array 120 | the target of test data 121 | x : placeholder 122 | for inputs 123 | y_ : placeholder 124 | for targets 125 | batch_size : int or None 126 | batch size for testing, when dataset is large, we should use minibatche for testing. 127 | when dataset is small, we can set it to None. 128 | cost : the TensorFlow expression of cost or None 129 | if None, would not display the cost 130 | 131 | Examples 132 | -------- 133 | >>> see tutorial_mnist_simple.py 134 | >>> tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost) 135 | """ 136 | print('Start testing the network ...') 137 | if batch_size is None: 138 | dp_dict = dict_to_one( network.all_drop ) 139 | feed_dict = {x: X_test, y_: y_test} 140 | feed_dict.update(dp_dict) 141 | if cost is not None: 142 | print(" test loss: %f" % sess.run(cost, feed_dict=feed_dict)) 143 | print(" test acc: %f" % sess.run(acc, feed_dict=feed_dict)) 144 | # print(" test acc: %f" % np.mean(y_test == sess.run(y_op, 145 | # feed_dict=feed_dict))) 146 | else: 147 | test_loss, test_acc, n_batch = 0, 0, 0 148 | for X_test_a, y_test_a in iterate.minibatches( 149 | X_test, y_test, batch_size, shuffle=True): 150 | dp_dict = dict_to_one( network.all_drop ) # disable noise layers 151 | feed_dict = {x: X_test_a, y_: y_test_a} 152 | feed_dict.update(dp_dict) 153 | if cost is not None: 154 | err, ac = sess.run([cost, acc], feed_dict=feed_dict) 155 | val_loss += err 156 | else: 157 | ac = sess.run(acc, feed_dict=feed_dict) 158 | test_acc += ac; n_batch += 1 159 | if cost is not None: 160 | print(" test loss: %f" % (test_loss/ n_batch)) 161 | print(" test acc: %f" % (test_acc/ n_batch)) 162 | 163 | 164 | def predict(sess, network, X, x, y_op): 165 | """ 166 | Return the predict results of given non time-series network. 167 | 168 | Parameters 169 | ---------- 170 | sess : TensorFlow session 171 | sess = tf.InteractiveSession() 172 | network : a TensorLayer layer 173 | the network will be trained 174 | X : numpy array 175 | the input 176 | y_op : placeholder 177 | the argmax expression of softmax outputs 178 | 179 | Examples 180 | -------- 181 | >>> see tutorial_mnist_simple.py 182 | >>> y = network.outputs 183 | >>> y_op = tf.argmax(tf.nn.softmax(y), 1) 184 | >>> print(tl.utils.predict(sess, network, X_test, x, y_op)) 185 | """ 186 | dp_dict = dict_to_one( network.all_drop ) # disable noise layers 187 | feed_dict = {x: X,} 188 | feed_dict.update(dp_dict) 189 | return sess.run(y_op, feed_dict=feed_dict) 190 | 191 | ## Evaluation 192 | def evaluation(y_test=None, y_predict=None, n_classes=None): 193 | """ 194 | Input the predicted results, targets results and 195 | the number of class, return the confusion matrix, F1-score of each class, 196 | accuracy and macro F1-score. 197 | 198 | Parameters 199 | ---------- 200 | y_test : numpy.array or list 201 | target results 202 | y_predict : numpy.array or list 203 | predicted results 204 | n_classes : int 205 | number of classes 206 | 207 | Examples 208 | -------- 209 | >>> c_mat, f1, acc, f1_macro = evaluation(y_test, y_predict, n_classes) 210 | """ 211 | from sklearn.metrics import confusion_matrix, f1_score, accuracy_score 212 | c_mat = confusion_matrix(y_test, y_predict, labels = [x for x in range(n_classes)]) 213 | f1 = f1_score(y_test, y_predict, average = None, labels = [x for x in range(n_classes)]) 214 | f1_macro = f1_score(y_test, y_predict, average='macro') 215 | acc = accuracy_score(y_test, y_predict) 216 | print('confusion matrix: \n',c_mat) 217 | print('f1-score:',f1) 218 | print('f1-score(macro):',f1_macro) # same output with > f1_score(y_true, y_pred, average='macro') 219 | print('accuracy-score:', acc) 220 | return c_mat, f1, acc, f1_macro 221 | 222 | def dict_to_one(dp_dict={}): 223 | """ 224 | Input a dictionary, return a dictionary that all items are set to one, 225 | use for disable dropout, dropconnect layer and so on. 226 | 227 | Parameters 228 | ---------- 229 | dp_dict : dictionary 230 | keeping probabilities 231 | 232 | Examples 233 | -------- 234 | >>> dp_dict = dict_to_one( network.all_drop ) 235 | >>> dp_dict = dict_to_one( network.all_drop ) 236 | >>> feed_dict.update(dp_dict) 237 | """ 238 | return {x: 1 for x in dp_dict} 239 | 240 | def flatten_list(list_of_list=[[],[]]): 241 | """ 242 | Input a list of list, return a list that all items are in a list 243 | 244 | Parameters 245 | ---------- 246 | list_of_list : a list of list 247 | 248 | Examples 249 | -------- 250 | >>> tl.utils.flatten_list([[1, 2, 3],[4, 5],[6]]) 251 | ... [1, 2, 3, 4, 5, 6] 252 | """ 253 | return sum(list_of_list, []) 254 | 255 | 256 | def class_balancing_oversample(X_train=None, y_train=None, printable=True): 257 | """Input the features and labels, return the features and labels after oversampling. 258 | 259 | Parameters 260 | ---------- 261 | X_train : numpy.array 262 | Features, each row is an example 263 | y_train : numpy.array 264 | Labels 265 | 266 | Examples 267 | -------- 268 | >>> X_train, y_train = class_balancing_oversample(X_train, y_train, printable=True) 269 | """ 270 | # ======== Classes balancing 271 | if printable: 272 | print("Classes balancing for training examples...") 273 | from collections import Counter 274 | c = Counter(y_train) 275 | if printable: 276 | print('the occurrence number of each stage: %s' % c.most_common()) 277 | print('the least stage is Label %s have %s instances' % c.most_common()[-1]) 278 | print('the most stage is Label %s have %s instances' % c.most_common(1)[0]) 279 | most_num = c.most_common(1)[0][1] 280 | if printable: 281 | print('most num is %d, all classes tend to be this num' % most_num) 282 | 283 | locations = {} 284 | number = {} 285 | 286 | for lab, num in c.most_common(): # find the index from y_train 287 | number[lab] = num 288 | locations[lab] = np.where(np.array(y_train)==lab)[0] 289 | if printable: 290 | print('convert list(np.array) to dict format') 291 | X = {} # convert list to dict 292 | for lab, num in number.items(): 293 | X[lab] = X_train[locations[lab]] 294 | 295 | # oversampling 296 | if printable: 297 | print('start oversampling') 298 | for key in X: 299 | temp = X[key] 300 | while True: 301 | if len(X[key]) >= most_num: 302 | break 303 | X[key] = np.vstack((X[key], temp)) 304 | if printable: 305 | print('first features of label 0 >', len(X[0][0])) 306 | print('the occurrence num of each stage after oversampling') 307 | for key in X: 308 | print(key, len(X[key])) 309 | if printable: 310 | print('make each stage have same num of instances') 311 | for key in X: 312 | X[key] = X[key][0:most_num,:] 313 | print(key, len(X[key])) 314 | 315 | # convert dict to list 316 | if printable: 317 | print('convert from dict to list format') 318 | y_train = [] 319 | X_train = np.empty(shape=(0,len(X[0][0]))) 320 | for key in X: 321 | X_train = np.vstack( (X_train, X[key] ) ) 322 | y_train.extend([key for i in range(len(X[key]))]) 323 | # print(len(X_train), len(y_train)) 324 | c = Counter(y_train) 325 | if printable: 326 | print('the occurrence number of each stage after oversampling: %s' % c.most_common()) 327 | # ================ End of Classes balancing 328 | return X_train, y_train 329 | 330 | 331 | 332 | 333 | # 334 | # def class_balancing_sequence_4D(X_train, y_train, sequence_length, model='downsampling' ,printable=True): 335 | # ''' 输入、输出都是sequence format 336 | # oversampling or downsampling 337 | # ''' 338 | # n_features = X_train.shape[2] 339 | # # ======== Classes balancing for sequence 340 | # if printable: 341 | # print("Classes balancing for 4D sequence training examples...") 342 | # from collections import Counter 343 | # c = Counter(y_train) # Counter({2: 454, 4: 267, 3: 124, 1: 57, 0: 48}) 344 | # if printable: 345 | # print('the occurrence number of each stage: %s' % c.most_common()) 346 | # print('the least Label %s have %s instances' % c.most_common()[-1]) 347 | # print('the most Label %s have %s instances' % c.most_common(1)[0]) 348 | # # print(c.most_common()) # [(2, 454), (4, 267), (3, 124), (1, 57), (0, 48)] 349 | # most_num = c.most_common(1)[0][1] 350 | # less_num = c.most_common()[-1][1] 351 | # 352 | # locations = {} 353 | # number = {} 354 | # for lab, num in c.most_common(): 355 | # number[lab] = num 356 | # locations[lab] = np.where(np.array(y_train)==lab)[0] 357 | # # print(locations) 358 | # # print(number) 359 | # if printable: 360 | # print(' convert list to dict') 361 | # X = {} # convert list to dict 362 | # ### a sequence 363 | # for lab, _ in number.items(): 364 | # X[lab] = np.empty(shape=(0,1,n_features,1)) # 4D 365 | # for lab, _ in number.items(): 366 | # #X[lab] = X_train[locations[lab] 367 | # for l in locations[lab]: 368 | # X[lab] = np.vstack((X[lab], X_train[l*sequence_length : (l+1)*(sequence_length)])) 369 | # # X[lab] = X_train[locations[lab]*sequence_length : locations[lab]*(sequence_length+1)] # a sequence 370 | # # print(X) 371 | # 372 | # if model=='oversampling': 373 | # if printable: 374 | # print(' oversampling -- most num is %d, all classes tend to be this num\nshuffle applied' % most_num) 375 | # for key in X: 376 | # temp = X[key] 377 | # while True: 378 | # if len(X[key]) >= most_num * sequence_length: # sequence 379 | # break 380 | # X[key] = np.vstack((X[key], temp)) 381 | # # print(key, len(X[key])) 382 | # if printable: 383 | # print(' make each stage have same num of instances') 384 | # for key in X: 385 | # X[key] = X[key][0:most_num*sequence_length,:] # sequence 386 | # if printable: 387 | # print(key, len(X[key])) 388 | # elif model=='downsampling': 389 | # import random 390 | # if printable: 391 | # print(' downsampling -- less num is %d, all classes tend to be this num by randomly choice without replacement\nshuffle applied' % less_num) 392 | # for key in X: 393 | # # print(key, len(X[key]))#, len(X[key])/sequence_length) 394 | # s_idx = [ i for i in range(int(len(X[key])/sequence_length))] 395 | # s_idx = np.asarray(s_idx)*sequence_length # start index of sequnce in X[key] 396 | # # print('s_idx',s_idx) 397 | # r_idx = np.random.choice(s_idx, less_num, replace=False) # random choice less_num of s_idx 398 | # # print('r_idx',r_idx) 399 | # temp = X[key] 400 | # X[key] = np.empty(shape=(0,1,n_features,1)) # 4D 401 | # for idx in r_idx: 402 | # X[key] = np.vstack((X[key], temp[idx:idx+sequence_length])) 403 | # # print(key, X[key]) 404 | # # np.random.choice(l, len(l), replace=False) 405 | # else: 406 | # raise Exception(' model should be oversampling or downsampling') 407 | # 408 | # # convert dict to list 409 | # if printable: 410 | # print(' convert dict to list') 411 | # y_train = [] 412 | # # X_train = np.empty(shape=(0,len(X[0][0]))) 413 | # # X_train = np.empty(shape=(0,len(X[1][0]))) # 2D 414 | # X_train = np.empty(shape=(0,1,n_features,1)) # 4D 415 | # l_key = list(X.keys()) # shuffle 416 | # random.shuffle(l_key) # shuffle 417 | # # for key in X: # no shuffle 418 | # for key in l_key: # shuffle 419 | # X_train = np.vstack( (X_train, X[key] ) ) 420 | # # print(len(X[key])) 421 | # y_train.extend([key for i in range(int(len(X[key])/sequence_length))]) 422 | # # print(X_train,y_train, type(X_train), type(y_train)) 423 | # # ================ End of Classes balancing for sequence 424 | # # print(X_train.shape, len(y_train)) 425 | # return X_train, np.asarray(y_train) 426 | -------------------------------------------------------------------------------- /tensorlayer1.2.2/visualize.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | import matplotlib 6 | matplotlib.use('Agg') 7 | import matplotlib.pyplot as plt 8 | # import matplotlib.pyplot as plt 9 | import numpy as np 10 | import os 11 | 12 | 13 | 14 | def W(W=None, second=10, saveable=True, shape=[28,28], name='mnist', fig_idx=2396512): 15 | """Visualize every columns of the weight matrix to a group of Greyscale img. 16 | 17 | Parameters 18 | ---------- 19 | W : numpy.array 20 | The weight matrix 21 | second : int 22 | The display second(s) for the image(s), if saveable is False. 23 | saveable : boolen 24 | Save or plot the figure. 25 | shape : a list with 2 int 26 | The shape of feature image, MNIST is [28, 80]. 27 | name : a string 28 | A name to save the image, if saveable is True. 29 | fig_idx : int 30 | matplotlib figure index. 31 | 32 | Examples 33 | -------- 34 | >>> tl.visualize.W(network.all_params[0].eval(), second=10, saveable=True, name='weight_of_1st_layer', fig_idx=2012) 35 | """ 36 | if saveable is False: 37 | plt.ion() 38 | fig = plt.figure(fig_idx) # show all feature images 39 | size = W.shape[0] 40 | n_units = W.shape[1] 41 | 42 | num_r = int(np.sqrt(n_units)) # 每行显示的个数 若25个hidden unit -> 每行显示5个 43 | num_c = int(np.ceil(n_units/num_r)) 44 | count = int(1) 45 | for row in range(1, num_r+1): 46 | for col in range(1, num_c+1): 47 | if count > n_units: 48 | break 49 | a = fig.add_subplot(num_r, num_c, count) 50 | # ------------------------------------------------------------ 51 | # plt.imshow(np.reshape(W[:,count-1],(28,28)), cmap='gray') 52 | # ------------------------------------------------------------ 53 | feature = W[:,count-1] / np.sqrt( (W[:,count-1]**2).sum()) 54 | # feature[feature<0.0001] = 0 # value threshold 55 | # if count == 1 or count == 2: 56 | # print(np.mean(feature)) 57 | # if np.std(feature) < 0.03: # condition threshold 58 | # feature = np.zeros_like(feature) 59 | # if np.mean(feature) < -0.015: # condition threshold 60 | # feature = np.zeros_like(feature) 61 | plt.imshow(np.reshape(feature ,(shape[0],shape[1])), 62 | cmap='gray', interpolation="nearest")#, vmin=np.min(feature), vmax=np.max(feature)) 63 | # ------------------------------------------------------------ 64 | # plt.imshow(np.reshape(W[:,count-1] ,(np.sqrt(size),np.sqrt(size))), cmap='gray', interpolation="nearest") 65 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick 66 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 67 | count = count + 1 68 | if saveable: 69 | plt.savefig(name+'.pdf',format='pdf') 70 | else: 71 | plt.draw() 72 | plt.pause(second) 73 | 74 | def frame(I=None, second=5, saveable=True, name='frame', fig_idx=12836): 75 | """Display a frame(image). Make sure OpenAI Gym render() is disable before using it. 76 | 77 | Parameters 78 | ---------- 79 | I : numpy.array 80 | The image 81 | second : int 82 | The display second(s) for the image(s), if saveable is False. 83 | saveable : boolen 84 | Save or plot the figure. 85 | name : a string 86 | A name to save the image, if saveable is True. 87 | fig_idx : int 88 | matplotlib figure index. 89 | 90 | Examples 91 | -------- 92 | >>> env = gym.make("Pong-v0") 93 | >>> observation = env.reset() 94 | >>> tl.visualize.frame(observation) 95 | """ 96 | if saveable is False: 97 | plt.ion() 98 | fig = plt.figure(fig_idx) # show all feature images 99 | 100 | plt.imshow(I) 101 | # plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick 102 | # plt.gca().yaxis.set_major_locator(plt.NullLocator()) 103 | 104 | if saveable: 105 | plt.savefig(name+'.pdf',format='pdf') 106 | else: 107 | plt.draw() 108 | plt.pause(second) 109 | 110 | def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362): 111 | """Display a group of RGB or Greyscale CNN masks. 112 | 113 | Parameters 114 | ---------- 115 | CNN : numpy.array 116 | The image. e.g: 64 5x5 RGB images can be (5, 5, 3, 64). 117 | second : int 118 | The display second(s) for the image(s), if saveable is False. 119 | saveable : boolen 120 | Save or plot the figure. 121 | name : a string 122 | A name to save the image, if saveable is True. 123 | fig_idx : int 124 | matplotlib figure index. 125 | 126 | Examples 127 | -------- 128 | >>> tl.visualize.CNN2d(network.all_params[0].eval(), second=10, saveable=True, name='cnn1_mnist', fig_idx=2012) 129 | """ 130 | # print(CNN.shape) # (5, 5, 3, 64) 131 | # exit() 132 | n_mask = CNN.shape[3] 133 | n_row = CNN.shape[0] 134 | n_col = CNN.shape[1] 135 | n_color = CNN.shape[2] 136 | row = int(np.sqrt(n_mask)) 137 | col = int(np.ceil(n_mask/row)) 138 | plt.ion() # active mode 139 | fig = plt.figure(fig_idx) 140 | count = 1 141 | for ir in range(1, row+1): 142 | for ic in range(1, col+1): 143 | if count > n_mask: 144 | break 145 | a = fig.add_subplot(col, row, count) 146 | # print(CNN[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 147 | # exit() 148 | # plt.imshow( 149 | # np.reshape(CNN[count-1,:,:,:], (n_row, n_col)), 150 | # cmap='gray', interpolation="nearest") # theano 151 | if n_color == 1: 152 | plt.imshow( 153 | np.reshape(CNN[:,:,:,count-1], (n_row, n_col)), 154 | cmap='gray', interpolation="nearest") 155 | elif n_color == 3: 156 | plt.imshow( 157 | np.reshape(CNN[:,:,:,count-1], (n_row, n_col, n_color)), 158 | cmap='gray', interpolation="nearest") 159 | else: 160 | raise Exception("Unknown n_color") 161 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick 162 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 163 | count = count + 1 164 | if saveable: 165 | plt.savefig(name+'.pdf',format='pdf') 166 | else: 167 | plt.draw() 168 | plt.pause(second) 169 | 170 | 171 | def images2d(images=None, second=10, saveable=True, name='images', dtype=None, 172 | fig_idx=3119362): 173 | """Display a group of RGB or Greyscale images. 174 | 175 | Parameters 176 | ---------- 177 | images : numpy.array 178 | The images. 179 | second : int 180 | The display second(s) for the image(s), if saveable is False. 181 | saveable : boolen 182 | Save or plot the figure. 183 | name : a string 184 | A name to save the image, if saveable is True. 185 | dtype : None or numpy data type 186 | The data type for displaying the images. 187 | fig_idx : int 188 | matplotlib figure index. 189 | 190 | Examples 191 | -------- 192 | >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False) 193 | >>> tl.visualize.images2d(X_train[0:100,:,:,:], second=10, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212) 194 | """ 195 | # print(images.shape) # (50000, 32, 32, 3) 196 | # exit() 197 | if dtype: 198 | images = np.asarray(images, dtype=dtype) 199 | n_mask = images.shape[0] 200 | n_row = images.shape[1] 201 | n_col = images.shape[2] 202 | n_color = images.shape[3] 203 | row = int(np.sqrt(n_mask)) 204 | col = int(np.ceil(n_mask/row)) 205 | plt.ion() # active mode 206 | fig = plt.figure(fig_idx) 207 | count = 1 208 | for ir in range(1, row+1): 209 | for ic in range(1, col+1): 210 | if count > n_mask: 211 | break 212 | a = fig.add_subplot(col, row, count) 213 | # print(images[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 214 | # plt.imshow( 215 | # np.reshape(images[count-1,:,:,:], (n_row, n_col)), 216 | # cmap='gray', interpolation="nearest") # theano 217 | if n_color == 1: 218 | plt.imshow( 219 | np.reshape(images[count-1,:,:], (n_row, n_col)), 220 | cmap='gray', interpolation="nearest") 221 | elif n_color == 3: 222 | plt.imshow(images[count-1,:,:], 223 | cmap='gray', interpolation="nearest") 224 | else: 225 | raise Exception("Unknown n_color") 226 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick 227 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 228 | count = count + 1 229 | if saveable: 230 | plt.savefig(name+'.pdf',format='pdf') 231 | else: 232 | plt.draw() 233 | plt.pause(second) 234 | 235 | def tsne_embedding(embeddings, reverse_dictionary, plot_only=500, 236 | second=5, saveable=False, name='tsne', fig_idx=9862): 237 | """Visualize the embeddings by using t-SNE. 238 | 239 | Parameters 240 | ---------- 241 | embeddings : a matrix 242 | The images. 243 | reverse_dictionary : a dictionary 244 | id_to_word, mapping id to unique word. 245 | plot_only : int 246 | The number of examples to plot, choice the most common words. 247 | second : int 248 | The display second(s) for the image(s), if saveable is False. 249 | saveable : boolen 250 | Save or plot the figure. 251 | name : a string 252 | A name to save the image, if saveable is True. 253 | fig_idx : int 254 | matplotlib figure index. 255 | 256 | Examples 257 | -------- 258 | >>> see 'tutorial_word2vec_basic.py' 259 | >>> final_embeddings = normalized_embeddings.eval() 260 | >>> tl.visualize.tsne_embedding(final_embeddings, labels, reverse_dictionary, 261 | ... plot_only=500, second=5, saveable=False, name='tsne') 262 | """ 263 | def plot_with_labels(low_dim_embs, labels, figsize=(18, 18), second=5, 264 | saveable=True, name='tsne', fig_idx=9862): 265 | assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" 266 | if saveable is False: 267 | plt.ion() 268 | plt.figure(fig_idx) 269 | plt.figure(figsize=figsize) #in inches 270 | for i, label in enumerate(labels): 271 | x, y = low_dim_embs[i,:] 272 | plt.scatter(x, y) 273 | plt.annotate(label, 274 | xy=(x, y), 275 | xytext=(5, 2), 276 | textcoords='offset points', 277 | ha='right', 278 | va='bottom') 279 | if saveable: 280 | plt.savefig(name+'.pdf',format='pdf') 281 | else: 282 | plt.draw() 283 | plt.pause(second) 284 | 285 | try: 286 | from sklearn.manifold import TSNE 287 | import matplotlib.pyplot as plt 288 | from six.moves import xrange 289 | 290 | tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) 291 | # plot_only = 500 292 | low_dim_embs = tsne.fit_transform(embeddings[:plot_only,:]) 293 | labels = [reverse_dictionary[i] for i in xrange(plot_only)] 294 | plot_with_labels(low_dim_embs, labels, second=second, saveable=saveable, \ 295 | name=name, fig_idx=fig_idx) 296 | except ImportError: 297 | print("Please install sklearn and matplotlib to visualize embeddings.") 298 | 299 | 300 | # 301 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | """ 6 | TensorLayer implementation of Google's "Show and Tell: A Neural Image Caption Generator". 7 | 8 | Before start, you need to download the inception_v3 ckpt model 9 | and MSCOCO data as the following link : 10 | https://github.com/tensorflow/models/tree/master/im2txt 11 | 12 | Paper: http://arxiv.org/abs/1411.4555 13 | """ 14 | 15 | import tensorflow as tf 16 | import tensorlayer as tl 17 | import time 18 | import numpy as np 19 | from buildmodel import * 20 | 21 | # DIR = "/home/haodong/Workspace/image_captioning" 22 | DIR = '/home/lei/Documents/Workspace/models/research/im2txt/im2txt' 23 | 24 | ## DIR ========================================================================= 25 | # Directory containing preprocessed MSCOCO data. 26 | MSCOCO_DIR = DIR + "/data/mscoco" 27 | # MSCOCO_DIR = "/home/haodong/Workspace/image_captioning/data/mscoco" 28 | # Inception v3 checkpoint file. 29 | INCEPTION_CHECKPOINT = DIR + "/data/inception_v3.ckpt" 30 | # Directory to save the model. 31 | MODEL_DIR = DIR + "/model" 32 | 33 | # File pattern of sharded TFRecord input files. 34 | input_file_pattern = MSCOCO_DIR + "/train-?????-of-00256" 35 | # Path to a pretrained inception_v3 model. File containing an Inception v3 36 | # checkpoint to initialize the variables of the Inception model. Must be 37 | # provided when starting training for the first time. 38 | inception_checkpoint_file = INCEPTION_CHECKPOINT 39 | # Directory for saving and loading model checkpoints. 40 | train_dir = MODEL_DIR + "/train" 41 | # Whether to train inception submodel variables. If True : Fine Tune the Inception v3 Model 42 | train_inception = False 43 | # Number of training steps. 44 | number_of_steps = 1000000 45 | # Frequency at which loss and global step are logged. 46 | log_every_n_steps = 1 47 | # Build the model. 48 | mode = "train" 49 | assert mode in ["train", "eval", "inference"] 50 | 51 | 52 | ## Train Config ================= Don't Change ================================= 53 | # Number of examples per epoch of training data. 54 | num_examples_per_epoch = 586363 55 | # Optimizer for training the model. 56 | optimizer = "SGD" 57 | # Learning rate for the initial phase of training. 58 | initial_learning_rate = 2.0 59 | learning_rate_decay_factor = 0.5 60 | num_epochs_per_decay = 8.0 61 | # Learning rate when fine tuning the Inception v3 parameters. 62 | train_inception_learning_rate = 0.0005 63 | # If not None, clip gradients to this value. 64 | clip_gradients = 5.0 65 | # How many model checkpoints to keep. 66 | max_checkpoints_to_keep = 5 67 | 68 | tf.logging.set_verbosity(tf.logging.INFO) # Enable tf.logging 69 | 70 | ## ============================================================================= 71 | # Create training directory. 72 | if not tf.gfile.IsDirectory(train_dir): 73 | # if not Directory for saving and loading model checkpoints, create it 74 | tf.logging.info("Creating training directory: %s", train_dir) 75 | tf.gfile.MakeDirs(train_dir) 76 | 77 | if not tf.gfile.IsDirectory(train_dir): 78 | tf.logging.info("Creating training directory: %s", train_dir) 79 | tf.gfile.MakeDirs(train_dir) 80 | # Alternatively, you can use os 81 | # if not os.path.exists(train_dir): 82 | # print("Creating training directory: %s"% train_dir) 83 | # os.makedirs(train_dir) 84 | 85 | # Build the TensorFlow graph. ================================================== 86 | g = tf.Graph() 87 | with g.as_default(): 88 | # with tf.device('/cpu:0'): 89 | sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) 90 | print("tl : Build Show and Tell Model") 91 | images, input_seqs, target_seqs, input_mask = Build_Inputs(mode, input_file_pattern) 92 | # ## Example of read data 93 | # from im2txt.inference_utils import vocabulary 94 | # # vocab = vocabulary.Vocabulary(FLAGS.vocab_file) 95 | # vocab = vocabulary.Vocabulary('/home/haodong/Workspace/image_captioning/data/mscoco/word_counts.txt') 96 | # print('vocab:',[vocab.id_to_word(w) for w in range(100)]) 97 | # sess = tf.Session()#tf.InteractiveSession() 98 | # sess.run(tf.initialize_all_variables()) 99 | # with tf.Session() as sess: 100 | # sess.run(tf.initialize_all_variables()) 101 | # coord = tf.train.Coordinator() 102 | # threads = tf.train.start_queue_runners(sess=sess, coord=coord) 103 | # for i in range(3): # number of mini-batch (step) 104 | # print("Step %d" % i) 105 | # img_val, caps, tar, mask = sess.run([images, input_seqs, target_seqs, input_mask]) 106 | # print(img_val.shape, caps.shape, tar.shape, mask.shape) 107 | # for i in range(len(caps)): # print all sentence in a batch, Note : the length is Dynamic ! 108 | # sentence = [vocab.id_to_word(id) for id in caps[i]] 109 | # print("input_seqs:"+ " ".join(sentence)) 110 | # sentence = [vocab.id_to_word(id) for id in tar[i]] 111 | # print("target_seqs:"+ " ".join(sentence)) 112 | # print("input_mask: %s" % mask[i]) 113 | # coord.request_stop() 114 | # coord.join(threads) 115 | # sess.close() 116 | # # ((32, 299, 299, 3), (32, 18), (32, 18), (32, 18)) 117 | # # input_seqs: a figurine with a plastic witches head is standing in front of a computer keyboard . a 118 | # # target_seqs:a figurine with a plastic witches head is standing in front of a computer keyboard . a 119 | # # input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0] 120 | # exit() 121 | # ## End of Example of read data 122 | # with tf.device('/gpu:0'): 123 | net_image_embeddings = Build_Image_Embeddings(mode, images, train_inception) 124 | net_seq_embeddings = Build_Seq_Embeddings(input_seqs) 125 | total_loss, _, _, network = Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, input_mask) 126 | 127 | network.print_layers() 128 | 129 | tvar = tf.all_variables() # or tf.trainable_variables() 130 | for idx, v in enumerate(tvar): 131 | print(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name)) 132 | 133 | # Sets up the function to restore inception variables from checkpoint. setup_inception_initializer() 134 | inception_variables = tf.get_collection( 135 | tf.GraphKeys.VARIABLES, scope="InceptionV3") 136 | 137 | # Sets up the global step Tensor. setup_global_step() 138 | print("tl : Sets up the Global Step") 139 | global_step = tf.Variable( 140 | initial_value=0, 141 | dtype=tf.int32, 142 | name="global_step", 143 | trainable=False, 144 | collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES]) 145 | 146 | # Set up the learning rate. 147 | learning_rate_decay_fn = None 148 | if train_inception: 149 | # when fine-tune 150 | learning_rate = tf.constant(train_inception_learning_rate) 151 | else: 152 | # when don't update inception_v3 153 | learning_rate = tf.constant(initial_learning_rate) 154 | if learning_rate_decay_factor > 0: 155 | num_batches_per_epoch = (num_examples_per_epoch / batch_size) 156 | decay_steps = int(num_batches_per_epoch * num_epochs_per_decay) 157 | def _learning_rate_decay_fn(learning_rate, global_step): 158 | return tf.train.exponential_decay( 159 | learning_rate, 160 | global_step, 161 | decay_steps=decay_steps, 162 | decay_rate=learning_rate_decay_factor, 163 | staircase=True) 164 | learning_rate_decay_fn = _learning_rate_decay_fn 165 | 166 | # with tf.device('/gpu:0'): 167 | # Set up the training ops. 168 | train_op = tf.contrib.layers.optimize_loss( 169 | loss=total_loss, 170 | global_step=global_step, 171 | learning_rate=learning_rate, 172 | optimizer=optimizer, 173 | clip_gradients=clip_gradients, 174 | learning_rate_decay_fn=learning_rate_decay_fn) 175 | 176 | sess = tf.InteractiveSession() 177 | sess.run(tf.initialize_all_variables()) 178 | if mode != "inference": 179 | print("tl : Restore InceptionV3 model from: %s" % inception_checkpoint_file) 180 | saver = tf.train.Saver(inception_variables) 181 | saver.restore(sess, inception_checkpoint_file) 182 | print("tl : Restore the lastest ckpt model from: %s" % train_dir) 183 | try: 184 | saver = tf.train.Saver() 185 | saver.restore(sess, tf.train.latest_checkpoint(train_dir)) # train_dir+"/model.ckpt-960000") 186 | except Exception: 187 | print(" Not ckpt found") 188 | 189 | # Set up the Saver for saving and restoring model checkpoints. 190 | saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep) 191 | 192 | print('Start training') # the 1st epoch will take a while 193 | coord = tf.train.Coordinator() 194 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 195 | for step in range(sess.run(global_step), number_of_steps+1): 196 | start_time = time.time() 197 | try: 198 | loss, _ = sess.run([total_loss, train_op]) 199 | print("step %d: loss = %.4f (%.2f sec)" % (step, loss, time.time() - start_time)) 200 | except: 201 | exit() 202 | if (step % 10000) == 0 and step != 0: 203 | # save_path = saver.save(sess, MODEL_DIR+"/train/model.ckpt-"+str(step)) 204 | save_path = saver.save(sess, MODEL_DIR+"/train/model.ckpt", global_step=step) 205 | tl.files.save_npz(network.all_params , name=MODEL_DIR+'/train/model_image_caption.npz') 206 | coord.request_stop() 207 | coord.join(threads) 208 | --------------------------------------------------------------------------------