Artificial Intelligence

Plotting the Training and Validation Loss Curves for the Transformer Model

Ztec 100

November 2, 2022

188

from tensorflow.keras.optimizers import Adam

from tensorflow.keras.optimizers.schedules import LearningRateSchedule

from tensorflow.keras.metrics import Mean

from tensorflow import knowledge, prepare, math, reduce_sum, solid, equal, argmax, float32, GradientTape, perform

from keras.losses import sparse_categorical_crossentropy

from mannequin import TransformerModel

from prepare_dataset import PrepareDataset

from time import time

from pickle import dump

# Define the mannequin parameters

h = 8 # Number of self-attention heads

d_k = 64 # Dimensionality of the linearly projected queries and keys

d_v = 64 # Dimensionality of the linearly projected values

d_model = 512 # Dimensionality of mannequin layers’ outputs

d_ff = 2048 # Dimensionality of the interior absolutely linked layer

n = 6 # Number of layers within the encoder stack

# Define the coaching parameters

epochs = 20

batch_size = 64

beta_1 = 0.9

beta_2 = 0.98

epsilon = 1e–9

dropout_rate = 0.1

# Implementing a studying fee scheduler

class LRScheduler(LearningRateSchedule):

def __init__(self, d_model, warmup_steps=4000, **kwargs):

tremendous(LRScheduler, self).__init__(**kwargs)

self.d_model = solid(d_model, float32)

self.warmup_steps = warmup_steps

def __call__(self, step_num):

# Linearly growing the training fee for the primary warmup_steps, and lowering it thereafter

arg1 = step_num ** –0.5

arg2 = step_num * (self.warmup_steps ** –1.5)

return (self.d_model ** –0.5) * math.minimal(arg1, arg2)

# Instantiate an Adam optimizer

optimizer = Adam(LRScheduler(d_model), beta_1, beta_2, epsilon)

# Prepare the coaching dataset

dataset = PrepareDataset()

trainX, trainY, valX, valY, train_orig, val_orig, enc_seq_length, dec_seq_length, enc_vocab_size, dec_vocab_size = dataset(‘english-german.pkl’)

print(enc_seq_length, dec_seq_length, enc_vocab_size, dec_vocab_size)

# Prepare the coaching dataset batches

train_dataset = knowledge.Dataset.from_tensor_slices((trainX, trainY))

train_dataset = train_dataset.batch(batch_size)

# Prepare the validation dataset batches

val_dataset = knowledge.Dataset.from_tensor_slices((valX, valY))

val_dataset = val_dataset.batch(batch_size)

# Create mannequin

training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length, h, d_k, d_v, d_model, d_ff, n, dropout_rate)

# Defining the loss perform

def loss_fcn(goal, prediction):

# Create masks in order that the zero padding values should not included within the computation of loss

padding_mask = math.logical_not(equal(goal, 0))

padding_mask = solid(padding_mask, float32)

# Compute a sparse categorical cross-entropy loss on the unmasked values

loss = sparse_categorical_crossentropy(goal, prediction, from_logits=True) * padding_masks

# Compute the imply loss over the unmasked values

return reduce_sum(loss) / reduce_sum(padding_mask)

# Defining the accuracy perform

def accuracy_fcn(goal, prediction):

# Create masks in order that the zero padding values should not included within the computation of accuracy

padding_mask = math.logical_not(equal(goal, 0))

# Find equal prediction and goal values, and apply the padding masks

accuracy = equal(goal, argmax(prediction, axis=2))

accuracy = math.logical_and(padding_mask, accuracy)

# Cast the True/False values to 32-bit-precision floating-point numbers

padding_mask = solid(padding_mask, float32)

accuracy = solid(accuracy, float32)

# Compute the imply accuracy over the unmasked values

return reduce_sum(accuracy) / reduce_sum(padding_mask)

# Include metrics monitoring

train_loss = Mean(identify=‘train_loss’)

train_accuracy = Mean(identify=‘train_accuracy’)

val_loss = Mean(identify=‘val_loss’)

# Create a checkpoint object and supervisor to handle a number of checkpoints

ckpt = prepare.Checkpoint(mannequin=training_model, optimizer=optimizer)

ckpt_manager = prepare.CheckpointSupervisor(ckpt, “./checkpoints”, max_to_keep=None)

# Initialise dictionaries to retailer the coaching and validation losses

train_loss_dict = {}

val_loss_dict = {}

# Speeding up the coaching course of

@perform

def train_step(encoder_input, decoder_input, decoder_output):

with GradientTape() as tape:

# Run the ahead move of the mannequin to generate a prediction

prediction = training_model(encoder_input, decoder_input, coaching=True)

# Compute the coaching loss

loss = loss_fcn(decoder_output, prediction)

# Compute the coaching accuracy

accuracy = accuracy_fcn(decoder_output, prediction)

# Retrieve gradients of the trainable variables with respect to the coaching loss

gradients = tape.gradient(loss, training_model.trainable_weights)

# Update the values of the trainable variables by gradient descent

optimizer.apply_gradients(zip(gradients, training_model.trainable_weights))

train_loss(loss)

train_accuracy(accuracy)

for epoch in vary(epochs):

train_loss.reset_states()

train_accuracy.reset_states()

val_loss.reset_states()

print(“nStart of epoch %d” % (epoch + 1))

start_time = time()

# Iterate over the dataset batches

for step, (train_batchX, train_batchY) in enumerate(train_dataset):

# Define the encoder and decoder inputs, and the decoder output

encoder_input = train_batchX[:, 1:]

decoder_input = train_batchY[:, :–1]

decoder_output = train_batchY[:, 1:]

train_step(encoder_input, decoder_input, decoder_output)

if step % 50 == 0:

print(f‘Epoch {epoch + 1} Step {step} Loss {train_loss.consequence():.4f} Accuracy {train_accuracy.consequence():.4f}’)

# Run a validation step after each epoch of coaching

for val_batchX, val_batchY in val_dataset:

# Define the encoder and decoder inputs, and the decoder output

encoder_input = val_batchX[:, 1:]

decoder_input = val_batchY[:, :–1]

decoder_output = val_batchY[:, 1:]

# Generate a prediction

prediction = training_model(encoder_input, decoder_input, coaching=False)

# Compute the validation loss

loss = loss_fcn(decoder_output, prediction)

val_loss(loss)

# Print epoch quantity and accuracy and loss values on the finish of each epoch

print(“Epoch %d: Training Loss %.4f, Training Accuracy %.4f, Validation Loss %.4f” % (epoch + 1, train_loss.consequence(), train_accuracy.consequence(), val_loss.consequence()))

# Save a checkpoint after each epoch

if (epoch + 1) % 1 == 0:

save_path = ckpt_manager.save()

print(“Saved checkpoint at epoch %d” % (epoch + 1))

# Save the educated mannequin weights

training_model.save_weights(“weights/wghts” + str(epoch + 1) + “.ckpt”)

train_loss_dict[epoch] = train_loss.consequence()

val_loss_dict[epoch] = val_loss.consequence()

# Save the coaching loss values

with open(‘./train_loss.pkl’, ‘wb’) as file:

dump(train_loss_dict, file)

# Save the validation loss values

with open(‘./val_loss.pkl’, ‘wb’) as file:

dump(val_loss_dict, file)

print(“Total time taken: %.2fs” % (time() – start_time))

Plotting the Training and Validation Loss Curves for the Transformer Model

LEAVE A REPLY Cancel reply

ABOUT US

POPULAR POSTS

Why Your Body Hates Your Meal Plan—And What to Eat Instead

Antidepressant Discontinuation Syndrome: Getting the Verdict

Legal problem places ACA’s preventive well being providers in jeopardy

POPULAR CATEGORY