Update: If I disable the GPU using the following code the differences become very very small on the order of 1e-8 instead of 8e-2. Seems like a GPU driver problem
import tensorflow as tf
# Disable all GPUS
tf.config.set_visible_devices([], 'GPU')
visible_devices = tf.config.get_visible_devices()
for device in visible_devices:
assert device.device_type != 'GPU'
import tensorflow as tf
import numpy as np
import pandas as pd
# Setup model
input_shape = (10, 5)
model_tst = tf.keras.Sequential()
model_tst.add(tf.keras.Input(shape=input_shape))
model_tst.add(tf.keras.layers.LSTM(100, return_sequences=True))
model_tst.add(tf.keras.layers.Dense(2, activation="sigmoid"))
model_tst.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)
model_tst.compile(
loss=loss,
optimizer=optimizer,
# metrics=[tf.keras.metrics.BinaryCrossentropy()
metrics=["mse"
]
)
# Generate step data
random_input = np.ones((11, 10, 5))
random_input[:, 8:, :] = 99
# Predictions
random_output2 = model_tst.predict(random_input, batch_size=1)[0, :, :].reshape(10, 2)
random_output3 = model_tst.predict(random_input, batch_size=10)[0, :, :].reshape(10, 2)
# Compare results
diff2 = random_output3 - random_output2
pd.DataFrame(diff2).T