My LSTM model below in tensorflow works without any error, but the values of the loss and accuracy seem to swarm around specific values, leading to a misleading conclusion after all the epochs.
My thoughts were/are:
-
It might be a problem of feeding the same batch each time. I think this cannot be the problem because I get the printing after each 10th epoch; so that I’m feeding different batches of the data.
-
It might be a cause of the low complexity of the LSTM network. However, I believe this cannot be the problem because the training phase should somehow improve over time because of the optimization.
-
As the loss and accuracy are around some value, I might be calculating them or printing them wrongly, leading to this values althought I don’t get my error in the code.
-
I think that this issue is due to some typo or misunderstanding in my code, because in some other case, I would see the loss and accuracy either increase or decrease, even with not constant patterns but not this way.
-
Finally, if all my code is well arranged, I think that this values could be due to bad hyperparameters settings. My following steps could be to increase the complexity of the LSTM network, lowering the learning rate and increasing the batch size.
As there is not a lot of documentation (and due to my little experience) about linking the dataset API, together with RNNs, different batches and different datasets, I may have done something wrongly.
Please, I would appreciate any review or knowdledge that could add some light to this.
Many thanks for your time.
CODE:
'''All the data is numeric. The dataset is scaled using the StandardScaler from scikit-learn and in the following shapes:''' #The 3D xt array has a shape of: (11, 69579, 74) #The 3D xval array has a shape of: (11, 7732, 74) #y shape is: (69579, 3) #yval shape is: (7732, 3) N_TIMESTEPS_X = xt.shape[0] ## The stack number BATCH_SIZE = 256 #N_OBSERVATIONS = xt.shape[1] N_FEATURES = xt.shape[2] N_OUTPUTS = yt.shape[1] N_NEURONS_LSTM = 128 ## Number of units in the LSTMCell N_EPOCHS = 600 LEARNING_RATE = 0.1 ### Define the placeholders anda gather the data. xt = xt.transpose([1,0,2]) xval = xval.transpose([1,0,2]) train_data = (xt, yt) validation_data = (xval, yval) ## We define the placeholders as a trick so that we do not break into memory problems, associated with feeding the data directly. '''As an alternative, you can define the Dataset in terms of tf.placeholder() tensors, and feed the NumPy arrays when you initialize an Iterator over the dataset.''' batch_size = tf.placeholder(tf.int64) x = tf.placeholder(tf.float32, shape=[None, N_TIMESTEPS_X, N_FEATURES], name='XPlaceholder') y = tf.placeholder(tf.float32, shape=[None, N_OUTPUTS], name='YPlaceholder') # Creating the two different dataset objects. train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat() val_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE) # Creating the Iterator type that permits to switch between datasets. itr = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes) train_init_op = itr.make_initializer(train_dataset) validation_init_op = itr.make_initializer(val_dataset) next_features, next_labels = itr.get_next() ### Create the graph cellType = tf.nn.rnn_cell.LSTMCell(num_units=N_NEURONS_LSTM, name='LSTMCell') inputs = tf.unstack(next_features, axis=1) '''inputs: A length T list of inputs, each a Tensor of shape [batch_size, input_size]''' RNNOutputs, _ = tf.nn.static_rnn(cell=cellType, inputs=inputs, dtype=tf.float32) out_weights = tf.get_variable("out_weights", shape=[N_NEURONS_LSTM, N_OUTPUTS], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) out_bias = tf.get_variable("out_bias", shape=[N_OUTPUTS], dtype=tf.float32, initializer=tf.zeros_initializer()) predictionsLayer = tf.matmul(RNNOutputs[-1], out_weights) + out_bias ### Define the cost function, that will be optimized by the optimizer. cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=predictionsLayer, labels=next_labels, name='Softmax_plus_Cross_Entropy')) optimizer_type = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE, name='AdamOptimizer') optimizer = optimizer_type.minimize(cost) ### Model evaluation correctPrediction = tf.equal(tf.argmax(predictionsLayer,1), tf.argmax(next_labels,1)) accuracy = tf.reduce_mean(tf.cast(correctPrediction,tf.float32)) N_BATCHES = train_data[0].shape[0] // BATCH_SIZE ## Saving variables so that we can restore them afterwards. saver = tf.train.Saver() save_dir = '/home/zmlaptop/Desktop/tfModels/{}_{}'.format(cellType.__class__.__name__, datetime.now().strftime("%Y%m%d%H%M%S")) os.mkdir(save_dir) varDict = {'nTimeSteps':N_TIMESTEPS_X, 'BatchSize': BATCH_SIZE, 'nFeatures':N_FEATURES, 'nNeuronsLSTM':N_NEURONS_LSTM, 'nEpochs':N_EPOCHS, 'learningRate':LEARNING_RATE, 'optimizerType': optimizer_type.__class__.__name__} varDicSavingTxt = save_dir + '/varDict.txt' modelFilesDir = save_dir + '/modelFiles' os.mkdir(modelFilesDir) logDir = save_dir + '/TBoardLogs' os.mkdir(logDir) acc_summary = tf.summary.scalar('Accuracy', accuracy) loss_summary = tf.summary.scalar('Cost_CrossEntropy', cost) summary_merged = tf.summary.merge_all() with open(varDicSavingTxt, 'w') as outfile: outfile.write(repr(varDict)) with tf.Session() as sess: tf.set_random_seed(2) sess.run(tf.global_variables_initializer()) train_writer = tf.summary.FileWriter(logDir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(logDir + '/validation') # initialise iterator with train data sess.run(train_init_op, feed_dict = {x : train_data[0], y: train_data[1], batch_size: BATCH_SIZE}) print('¡Training starts!') for epoch in range(N_EPOCHS): batchAccList = [] tot_loss = 0 for batch in range(N_BATCHES): optimizer_output, loss_value, summary, accBatch = sess.run([optimizer, cost, summary_merged, accuracy], feed_dict = {x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE}) tot_loss += loss_value batchAccList.append(accBatch) if batch % 10 == 0: train_writer.add_summary(summary, batch) epochAcc = tf.reduce_mean(batchAccList) epochAcc_num = sess.run(epochAcc, feed_dict = {x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE}) if epoch%10 == 0: print("Epoch: {}, Loss: {:.4f}, Accuracy: {}".format(epoch, tot_loss / N_BATCHES, epochAcc_num)) # initialise iterator with validation data sess.run(validation_init_op, feed_dict = {x: validation_data[0], y: validation_data[1], batch_size:len(validation_data[0])}) valLoss, valAcc = sess.run([cost, accuracy], feed_dict = {x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE}) print('Validation Loss: {:4f}, Validation Accuracy: {}'.format(valLoss, valAcc)) summary_val = sess.run(summary_merged, feed_dict = {x: validation_data[0], y: validation_data[1], batch_size: len(validation_data[0])}) validation_writer.add_summary(summary_val) saver.save(sess, modelFilesDir)
This is the output in tensorboard for accuracy and loss (stopped at around 250 epochs):