from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import LearningRateSchedule
from tensorflow.keras.metrics import Mean
from tensorflow import knowledge, prepare, math, reduce_sum, solid, equal, argmax, float32, GradientTape, perform
from keras.losses import sparse_categorical_crossentropy
from mannequin import TransformerModel
from prepare_dataset import PrepareDataset
from time import time
from pickle import dump
# Define the mannequin parameters
h = 8 # Number of self-attention heads
d_k = 64 # Dimensionality of the linearly projected queries and keys
d_v = 64 # Dimensionality of the linearly projected values
d_model = 512 # Dimensionality of mannequin layers’ outputs
d_ff = 2048 # Dimensionality of the interior absolutely linked layer
n = 6 # Number of layers within the encoder stack
# Define the coaching parameters
epochs = 20
batch_size = 64
beta_1 = 0.9
beta_2 = 0.98
epsilon = 1e–9
dropout_rate = 0.1
# Implementing a studying fee scheduler
class LRScheduler(LearningRateSchedule):
def __init__(self, d_model, warmup_steps=4000, **kwargs):
tremendous(LRScheduler, self).__init__(**kwargs)
self.d_model = solid(d_model, float32)
self.warmup_steps = warmup_steps
def __call__(self, step_num):
# Linearly growing the training fee for the primary warmup_steps, and lowering it thereafter
arg1 = step_num ** –0.5
arg2 = step_num * (self.warmup_steps ** –1.5)
return (self.d_model ** –0.5) * math.minimal(arg1, arg2)
# Instantiate an Adam optimizer
optimizer = Adam(LRScheduler(d_model), beta_1, beta_2, epsilon)
# Prepare the coaching dataset
dataset = PrepareDataset()
trainX, trainY, valX, valY, train_orig, val_orig, enc_seq_length, dec_seq_length, enc_vocab_size, dec_vocab_size = dataset(‘english-german.pkl’)
print(enc_seq_length, dec_seq_length, enc_vocab_size, dec_vocab_size)
# Prepare the coaching dataset batches
train_dataset = knowledge.Dataset.from_tensor_slices((trainX, trainY))
train_dataset = train_dataset.batch(batch_size)
# Prepare the validation dataset batches
val_dataset = knowledge.Dataset.from_tensor_slices((valX, valY))
val_dataset = val_dataset.batch(batch_size)
# Create mannequin
training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length, h, d_k, d_v, d_model, d_ff, n, dropout_rate)
# Defining the loss perform
def loss_fcn(goal, prediction):
# Create masks in order that the zero padding values should not included within the computation of loss
padding_mask = math.logical_not(equal(goal, 0))
padding_mask = solid(padding_mask, float32)
# Compute a sparse categorical cross-entropy loss on the unmasked values
loss = sparse_categorical_crossentropy(goal, prediction, from_logits=True) * padding_masks
# Compute the imply loss over the unmasked values
return reduce_sum(loss) / reduce_sum(padding_mask)
# Defining the accuracy perform
def accuracy_fcn(goal, prediction):
# Create masks in order that the zero padding values should not included within the computation of accuracy
padding_mask = math.logical_not(equal(goal, 0))
# Find equal prediction and goal values, and apply the padding masks
accuracy = equal(goal, argmax(prediction, axis=2))
accuracy = math.logical_and(padding_mask, accuracy)
# Cast the True/False values to 32-bit-precision floating-point numbers
padding_mask = solid(padding_mask, float32)
accuracy = solid(accuracy, float32)
# Compute the imply accuracy over the unmasked values
return reduce_sum(accuracy) / reduce_sum(padding_mask)
# Include metrics monitoring
train_loss = Mean(identify=‘train_loss’)
train_accuracy = Mean(identify=‘train_accuracy’)
val_loss = Mean(identify=‘val_loss’)
# Create a checkpoint object and supervisor to handle a number of checkpoints
ckpt = prepare.Checkpoint(mannequin=training_model, optimizer=optimizer)
ckpt_manager = prepare.CheckpointSupervisor(ckpt, “./checkpoints”, max_to_keep=None)
# Initialise dictionaries to retailer the coaching and validation losses
train_loss_dict = {}
val_loss_dict = {}
# Speeding up the coaching course of
@perform
def train_step(encoder_input, decoder_input, decoder_output):
with GradientTape() as tape:
# Run the ahead move of the mannequin to generate a prediction
prediction = training_model(encoder_input, decoder_input, coaching=True)
# Compute the coaching loss
loss = loss_fcn(decoder_output, prediction)
# Compute the coaching accuracy
accuracy = accuracy_fcn(decoder_output, prediction)
# Retrieve gradients of the trainable variables with respect to the coaching loss
gradients = tape.gradient(loss, training_model.trainable_weights)
# Update the values of the trainable variables by gradient descent
optimizer.apply_gradients(zip(gradients, training_model.trainable_weights))
train_loss(loss)
train_accuracy(accuracy)
for epoch in vary(epochs):
train_loss.reset_states()
train_accuracy.reset_states()
val_loss.reset_states()
print(“nStart of epoch %d” % (epoch + 1))
start_time = time()
# Iterate over the dataset batches
for step, (train_batchX, train_batchY) in enumerate(train_dataset):
# Define the encoder and decoder inputs, and the decoder output
encoder_input = train_batchX[:, 1:]
decoder_input = train_batchY[:, :–1]
decoder_output = train_batchY[:, 1:]
train_step(encoder_input, decoder_input, decoder_output)
if step % 50 == 0:
print(f‘Epoch {epoch + 1} Step {step} Loss {train_loss.consequence():.4f} Accuracy {train_accuracy.consequence():.4f}’)
# Run a validation step after each epoch of coaching
for val_batchX, val_batchY in val_dataset:
# Define the encoder and decoder inputs, and the decoder output
encoder_input = val_batchX[:, 1:]
decoder_input = val_batchY[:, :–1]
decoder_output = val_batchY[:, 1:]
# Generate a prediction
prediction = training_model(encoder_input, decoder_input, coaching=False)
# Compute the validation loss
loss = loss_fcn(decoder_output, prediction)
val_loss(loss)
# Print epoch quantity and accuracy and loss values on the finish of each epoch
print(“Epoch %d: Training Loss %.4f, Training Accuracy %.4f, Validation Loss %.4f” % (epoch + 1, train_loss.consequence(), train_accuracy.consequence(), val_loss.consequence()))
# Save a checkpoint after each epoch
if (epoch + 1) % 1 == 0:
save_path = ckpt_manager.save()
print(“Saved checkpoint at epoch %d” % (epoch + 1))
# Save the educated mannequin weights
training_model.save_weights(“weights/wghts” + str(epoch + 1) + “.ckpt”)
train_loss_dict[epoch] = train_loss.consequence()
val_loss_dict[epoch] = val_loss.consequence()
# Save the coaching loss values
with open(‘./train_loss.pkl’, ‘wb’) as file:
dump(train_loss_dict, file)
# Save the validation loss values
with open(‘./val_loss.pkl’, ‘wb’) as file:
dump(val_loss_dict, file)
print(“Total time taken: %.2fs” % (time() – start_time))