add logging

peto184 · Mar 15, 2019 · 0fc0780 · 0fc0780
1 parent ec7cfe5
commit 0fc0780
Show file tree

Hide file tree

Showing 5 changed files with 106 additions and 8 deletions.
diff --git a/data.py b/data.py
@@ -25,7 +25,7 @@ def __init__(self, args):
 
         self.use_char_embeds = args.type == 'char' 
 
-        self.train = self.tokenize(os.path.join(args.data, 'train.json'), use_char_embeds=self.use_char_embeds)
+        self.train = self.tokenize(os.path.join(args.data, 'dataset.json'), use_char_embeds=self.use_char_embeds)
         self.valid = self.tokenize(os.path.join(args.data, 'valid.json'), use_char_embeds=self.use_char_embeds)
 
     def tokenize(self, path, use_char_embeds=False):
@@ -34,7 +34,6 @@ def tokenize(self, path, use_char_embeds=False):
         data = load_data(path)
         data = ' '.join(data['reviewText'])
 
-
         tokens = 0
         if use_char_embeds:
             data = list(data)

diff --git a/generate.py b/generate.py
@@ -7,7 +7,7 @@
 # Model parameters.
 parser.add_argument('--data', type=str, default='./data/',
                     help='location of the data corpus')
-parser.add_argument('--checkpoint', type=str, default='./output/model.pt',
+parser.add_argument('--checkpoint', type=str, default='./models/m_char_10.pkl',
                     help='model checkpoint to use')
 parser.add_argument('--outf', type=str, default='./output/generated.txt',
                     help='output file for generated text')
@@ -33,14 +33,18 @@
 if args.temperature < 1e-3:
     parser.error("--temperature has to be greater or equal 1e-3")
 
+print(f'Loading model {args.checkpoint}')
 with open(args.checkpoint, 'rb') as f:
     model = torch.load(f).to(device)
-
 model.eval()
+
+print(f'Loading the dataset')
 corpus = data.Corpus(args)
 ntokens = len(corpus.dictionary)
 hidden = model.init_hidden(1)
 
+print(corpus.describe())
+
 input = torch.randint(ntokens, (1, 1), dtype=torch.long).to(device)
 
 with open(args.outf, 'w') as outf:

diff --git a/inference.py b/inference.py
@@ -0,0 +1,30 @@
+import torch
+from util import repackage_hidden, get_batch
+
+temperature = 1.0  # temperature - higher will increase diversity
+
+
+def inference(args, model, corpus, device):
+    # Turn on evaluation mode which disables dropout.
+    model.eval()
+
+    hidden = model.init_hidden(1)
+    input = torch.randint(len(corpus.dictionary), (1, 1),
+                          dtype=torch.long).to(device)
+
+    result = ''
+
+    with torch.no_grad():  # no tracking history
+        for i in range(100):
+            output, hidden = model(input, hidden)
+            element_weights = output.squeeze().div(temperature).exp().cpu()
+            element_idx = torch.multinomial(element_weights, 1)[0]
+            input.fill_(element_idx)
+            word = corpus.dictionary.idx2element[element_idx]
+
+            if args.type == 'word':
+                result += word + ('\n' if i % 20 == 19 else ' ')
+            else:
+                result += word + ('\n' if i % 80 == 79 else '')
+
+    return result
diff --git a/main.py b/main.py
@@ -53,7 +53,10 @@
 
 device = torch.device("cuda" if args.cuda else "cpu")
 print(f"Using {device} to train.")
+
+print('Loading corpus.')
 corpus = data.Corpus(args)
+
 print(corpus.describe())
 ntokens = len(corpus.dictionary)
 model = RNNModel(ntokens, args.emsize, args.nhid, args.nlayers, dropout=args.dropout).to(device)

diff --git a/train.py b/train.py
@@ -1,22 +1,45 @@
 import torch
 import time
 import math
+import os
 
 from evaluate import evaluate
+from inference import inference
 from util import get_batch, batchify, repackage_hidden
 
-def _train_epoch(args, epoch, model, train_data, corpus, device, lr, criterion):
+import tensorflow as tf
+import numpy as np
 
-    # Turn on training mode which enables dropout.
-    model.train()
+_time = time.strftime('%Y_%m_%d__%H:%M:%S')
+LOG_FOLDER = f'./log/{_time}'
+summary_writer = tf.summary.FileWriter(LOG_FOLDER)  
+
+def inject_summary(summary_writer, tag, value, step):
+    summary = tf.Summary(
+        value=[tf.Summary.Value(tag=tag, simple_value=value)])
+    summary_writer.add_summary(summary, global_step=step)
+
+def inject_summary_text(summary_writer, tag, value, step):
+    t = tf.constant([value])
+    summary = tf.summary.text(tag, t)
+
+    with tf.Session() as sess:
+        s = sess.run(summary)
+        summary_writer.add_summary(s)
 
+
+def _train_epoch(args, epoch, model, train_data, corpus, device, lr, criterion):
     total_loss = 0.
     start_time = time.time()
     ntokens = len(corpus.dictionary)
 
     hidden = model.init_hidden(args.batch_size)
+
+    model.train()
     for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
+
         data, targets = get_batch(args.bptt, train_data, i)
+
         # Starting each batch, we detach the hidden state from how it was previously produced.
         # If we didn't, the model would try backpropagating all the way to start of the dataset.
         hidden = repackage_hidden(hidden)
@@ -42,14 +65,31 @@ def _train_epoch(args, epoch, model, train_data, corpus, device, lr, criterion):
             total_loss = 0
             start_time = time.time()
 
+            # Log to tensorboard
+            info = {
+                f'training/{epoch}loss/loss': cur_loss,
+                f'training/{epoch}/loss_exp': math.exp(cur_loss),
+                'training/lr' : lr,
+            }
+
+            for tag, value in info.items():
+                inject_summary(summary_writer, tag, value, i)
+
+            summary_writer.flush()
+
+
 def train(args, model, corpus, device, criterion):
     # At any point you can hit Ctrl + C to break out of training early.
     lr = args.lr
-    best_val_loss = None
+    best_val_loss = None  
 
     train_data = batchify(corpus.train, args.batch_size).to(device)
     valid_data = batchify(corpus.valid, args.batch_size).to(device)
 
+    if not os.path.exists(LOG_FOLDER):
+        print(f'Creatnig folder {LOG_FOLDER}')
+        os.makedirs(LOG_FOLDER)
+
     try:
         for epoch in range(1, args.epochs+1):
             epoch_start_time = time.time()
@@ -69,6 +109,28 @@ def train(args, model, corpus, device, criterion):
             else:
                 # Anneal the learning rate if no improvement has been seen in the validation dataset.
                 lr /= 4.0
+
+            # Persist after each epoch
+            with open(f"./models/m_{args.type}_{epoch}.pkl", 'wb') as f:
+                torch.save(model, f)
+
+            # Inference a text after each epoch
+            inference_text = inference(args, model, corpus, device)
+            print(f'Generated text: {inference_text}')
+            inject_summary_text(summary_writer, f'Inference_{epoch}', inference_text, epoch)
+
+            # Log to tensorboard
+            info = {
+                'validation/loss/val_loss': val_loss,
+                'validation/loss/val_loss_exp': math.exp(val_loss),
+            }
+
+            for tag, value in info.items():
+                inject_summary(summary_writer, tag, value, epoch)
+
+            summary_writer.flush()
+
+
     except KeyboardInterrupt:
         print('-' * 89)
         print('Exiting from training early')