Stuck with an error in graph mode

Lakshmi_Narayana · May 21, 2023, 12:55pm

I am trying to fit an entire training loop including the epochs into graph mode and I keep getting this error:

in user code:

    File "C:\Users\naray\AppData\Roaming\JetBrains\PyCharm2022.3\scratches\scratch_10.py", line 69, in train  *
        for epoch in tf.range(0, epochs, dtype=tf.int8):

    TypeError: Failed to convert elements of SparseCategoricalAccuracy(name=sparse_categorical_accuracy,dtype=float32) to Tensor. Consider casting elements to a supported type. See https://www.tensorflow.org/api_docs/python/tf/dtypes for supported TF dtypes.

This is my own exercise, so I am pasting the code here:

import os

import tensorflow as tf
import tensorflow_datasets as tfds

import numpy as np

import time

start = time.time()
os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '1'
batch_size = tf.constant(64)
epochs = tf.constant(10)

train_data, info = tfds.load('fashion_mnist', split='train', with_info=True)
test_data = tfds.load('fashion_mnist', split='test')


def prepare_data(data):
    image = data['image']
    image = tf.reshape(image, (-1,))
    image = tf.cast(image, tf.float32)
    image = image / 255.0
    return image, data['label']


train_data = train_data.map(prepare_data)
train_data = train_data.shuffle(buffer_size=1024).batch(64)

test_data = test_data.map(prepare_data)
test_data = test_data.shuffle(buffer_size=1024).batch(64)


def basic_model():
    inputs = tf.keras.layers.Input(shape=(784,))
    x = tf.keras.layers.Dense(64, activation='relu')(inputs)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

    return tf.keras.Model(inputs=inputs, outputs=outputs)


model = basic_model()

loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_acc_object = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_object = tf.keras.metrics.SparseCategoricalAccuracy()
train_losses = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)
val_losses = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)


def validate(test_data, model, val_losses, val_acc_object, loss_object):
    print('validation trace')
    for x, ytrue in test_data:
        ypred = model(x)
        loss_value = loss_object(ytrue, ypred)
        val_losses.write(val_losses.size() - 1, loss_value).mark_used()
        val_acc_object.update_state(ytrue, ypred)
    return val_losses, val_acc_object


@tf.function
def train(train_data, test_data, model, loss_object, optimizer,
          train_acc_object, val_acc_object, epochs,
          train_losses,
          val_losses):
    print('train trace', tf.range(0, epochs, dtype=tf.int8))
    for epoch in tf.range(0, epochs, dtype=tf.int8):
        tf.print('epoch: ', epoch)
        for x, ytrue in train_data:
            with tf.GradientTape() as tape:
                ypred = model(x)
                loss_value = loss_object(ytrue, ypred)
            grads = tape.gradient(loss_value, model.variables)
            train_losses.write(train_losses.size() - 1, loss_value).mark_used()
            # tf.print(tf.convert_to_tensor(grads).shape, ' ', tf.convert_to_tensor(model.variables).shape)
            optimizer.apply_gradients(zip(grads, model.variables))
            train_acc_object.update_state(ytrue, ypred)

        val_losses, val_acc_object = validate(test_data, model, val_losses, val_acc_object, loss_object)
        tf.print('train_loss: ', tf.math.reduce_mean(train_losses.stack()), ' train_acc: ', train_acc_object.result(),
                 ' val_loss: ', tf.math.reduce_mean(val_losses.stack()), ' val_accuracy: ', val_acc_object.result())
        train_acc_object.reset_state()
        val_acc_object.reset_state()

try:
    train(train_data=train_data, test_data=test_data, model=model, loss_object=loss_object, optimizer=optimizer, train_acc_object=train_acc_object, val_acc_object=val_acc_object, epochs=epochs, train_losses=train_losses, val_losses=val_losses)
except TypeError as e:
    print(tf.autograph.to_code(train.python_function))
    print(e)
print('time: ', time.time() - start)

Here is the graph generator:

def tf__train(train_data, test_data, model, loss_object, optimizer, train_acc_object, val_acc_object, epochs, train_losses, val_losses):
    with ag__.FunctionScope('train', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:
        ag__.ld(print)('train trace', ag__.converted_call(ag__.ld(tf).range, (0, ag__.ld(epochs)), dict(dtype=ag__.ld(tf).int8), fscope))

        def get_state_1():
            return (val_acc_object, val_losses)

        def set_state_1(vars_):
            nonlocal val_acc_object, val_losses
            (val_acc_object, val_losses) = vars_

        def loop_body_1(itr_1):
            nonlocal val_acc_object, val_losses
            epoch = itr_1
            ag__.converted_call(ag__.ld(tf).print, ('epoch: ', ag__.ld(epoch)), None, fscope)

            def get_state():
                return ()

            def set_state(block_vars):
                pass

            def loop_body(itr):
                (x, ytrue) = itr
                with ag__.ld(tf).GradientTape() as tape:
                    ypred = ag__.converted_call(ag__.ld(model), (ag__.ld(x),), None, fscope)
                    loss_value = ag__.converted_call(ag__.ld(loss_object), (ag__.ld(ytrue), ag__.ld(ypred)), None, fscope)
                grads = ag__.converted_call(ag__.ld(tape).gradient, (ag__.ld(loss_value), ag__.ld(model).variables), None, fscope)
                ag__.converted_call(ag__.converted_call(ag__.ld(train_losses).write, (ag__.converted_call(ag__.ld(train_losses).size, (), None, fscope) - 1, ag__.ld(loss_value)), None, fscope).mark_used, (), None, fscope)
                ag__.converted_call(ag__.ld(optimizer).apply_gradients, (ag__.converted_call(ag__.ld(zip), (ag__.ld(grads), ag__.ld(model).variables), None, fscope),), None, fscope)
                ag__.converted_call(ag__.ld(train_acc_object).update_state, (ag__.ld(ytrue), ag__.ld(ypred)), None, fscope)
            ag__.for_stmt(ag__.ld(train_data), None, loop_body, get_state, set_state, (), {'iterate_names': '(x, ytrue)'})
            (val_losses, val_acc_object) = ag__.converted_call(ag__.ld(validate), (ag__.ld(test_data), ag__.ld(model), ag__.ld(val_losses), ag__.ld(val_acc_object), ag__.ld(loss_object)), None, fscope)
            ag__.converted_call(ag__.ld(tf).print, ('train_loss: ', ag__.converted_call(ag__.ld(tf).math.reduce_mean, (ag__.converted_call(ag__.ld(train_losses).stack, (), None, fscope),), None, fscope), ' train_acc: ', ag__.converted_call(ag__.ld(train_acc_object).result, (), None, fscope), ' val_loss: ', ag__.converted_call(ag__.ld(tf).math.reduce_mean, (ag__.converted_call(ag__.ld(val_losses).stack, (), None, fscope),), None, fscope), ' val_accuracy: ', ag__.converted_call(ag__.ld(val_acc_object).result, (), None, fscope)), None, fscope)
        ypred = ag__.Undefined('ypred')
        tape = ag__.Undefined('tape')
        ytrue = ag__.Undefined('ytrue')
        grads = ag__.Undefined('grads')
        x = ag__.Undefined('x')
        epoch = ag__.Undefined('epoch')
        loss_value = ag__.Undefined('loss_value')
        ag__.for_stmt(ag__.converted_call(ag__.ld(tf).range, (0, ag__.ld(epochs)), dict(dtype=ag__.ld(tf).int8), fscope), None, loop_body_1, get_state_1, set_state_1, ('val_acc_object', 'val_losses'), {'iterate_names': 'epoch'})

The error occurs at the epoch loop but says it is about SparseCategoricalCrossentropy, which is what I don’t understand. Please help me with this.

gent.spah · May 21, 2023, 4:33pm

What would happen if you use range() instead if tf.range()?

Lakshmi_Narayana · May 21, 2023, 6:21pm

Converting to python range still gives the same error. I’m assuming the problem is that the objects of SparseCategoricalAccuracy metric are not being received by the graph. Is there a way to convert these to normal tensors or pass it in a way that a graph can accept it?

gent.spah · May 21, 2023, 6:25pm

Can you try categoricalcrossentropy instead, if the labels are one hot encoded thats what you need!

Lakshmi_Narayana · May 21, 2023, 8:55pm

I was able to run it without errors in eager mode, so I don’t think the problem is related to the loss object. It has to be with the graph mode.
If I try to return something like a metric object or a model from the decorated function, I get this error. Otherwise, I am able to run it in graph mode without including the loop and by returning only tensors from the decorated function.

Here is my program without including loops in graph mode:

import os

import tensorflow as tf
import tensorflow_datasets as tfds

import numpy as np

import time

start = time.time()
os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '1'
batch_size = tf.constant(64)
epochs = tf.constant(10)

train_data, info = tfds.load('fashion_mnist', split='train', with_info=True)
test_data = tfds.load('fashion_mnist', split='test')


def prepare_data(data):
    image = data['image']
    image = tf.reshape(image, (-1,))
    image = tf.cast(image, tf.float32)
    image = image / 255.0
    return image, data['label']


train_data = train_data.map(prepare_data)
train_data = train_data.shuffle(buffer_size=1024).batch(64).prefetch(1)

test_data = test_data.map(prepare_data)
test_data = test_data.shuffle(buffer_size=1024).batch(64).prefetch(1)


def basic_model():
    inputs = tf.keras.layers.Input(shape=(784,))
    x = tf.keras.layers.Dense(64, activation='relu')(inputs)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

    return tf.keras.Model(inputs=inputs, outputs=outputs)


model = basic_model()

# loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
# optimizer = tf.keras.optimizers.Adam()
# train_acc_object = tf.keras.metrics.SparseCategoricalAccuracy()
# val_acc_object = tf.keras.metrics.SparseCategoricalAccuracy()
# train_losses = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)
# val_losses = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)
loss_value = tf.Variable(0)

@tf.function
def train_step( x, ytrue, model, loss_object, optimizer):
    print('train_step trace')
    with tf.GradientTape() as tape:
        ypred = model(x)
        loss_value = loss_object(ytrue, ypred)
    grads = tape.gradient(loss_value, model.variables)
    optimizer.apply_gradients(zip(grads, model.variables))
    return loss_value, ypred


@tf.function
def validation_step(x, ytrue, model, loss_object):
    print('validation_step trace')
    ypred = model(x)
    loss_value = loss_object(ytrue, ypred)
    return loss_value, ypred


def train(model=model, train_data=train_data, test_data=test_data):
    print('train trace')
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
    optimizer = tf.keras.optimizers.Adam()
    train_acc_object = tf.keras.metrics.SparseCategoricalAccuracy()
    val_acc_object = tf.keras.metrics.SparseCategoricalAccuracy()
    train_losses = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)
    val_losses = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)
    for epoch in tf.range(epochs, dtype=tf.int32):
        print('epoch ',epoch)
        for i, (x, ytrue) in train_data.enumerate():
            loss_value, ypred = train_step(x, ytrue, model, loss_object, optimizer)
        train_losses.write(i, loss_value).mark_used()
        train_acc_object.update_state(ytrue, ypred)

        for i, (x, ytrue) in test_data.enumerate():
            loss_value, ypred = validation_step(x, ytrue, model, loss_object)
        val_losses.write(i, loss_value).mark_used()
        val_acc_object.update_state(ytrue, ypred)

        print('train_error: ', tf.math.reduce_mean(train_losses.stack()), ' train_acc: ',train_acc_object.result(),
              ' val_error: ', tf.math.reduce_mean(val_losses.stack()), ' val_acc: ',val_acc_object.result())


try:
    train()
except TypeError as e:
    print(tf.autograph.to_code(train_step.python_function))
    print(tf.autograph.to_code(validation_step.python_function))
    print(e)

print('time: ',time.time() - start)

Here is my output in graph mode without including loops:

train trace
epoch  tf.Tensor(0, shape=(), dtype=int32)
train_step trace
train_step trace
train_step trace
validation_step trace
validation_step trace
train_error:  tf.Tensor(0.00022745856, shape=(), dtype=float32)  train_acc:  tf.Tensor(0.96875, shape=(), dtype=float32)  val_error:  tf.Tensor(0.0037840025, shape=(), dtype=float32)  val_acc:  tf.Tensor(0.8125, shape=(), dtype=float32)
epoch  tf.Tensor(1, shape=(), dtype=int32)
train_error:  tf.Tensor(0.00012775861, shape=(), dtype=float32)  train_acc:  tf.Tensor(0.953125, shape=(), dtype=float32)  val_error:  tf.Tensor(0.00090734364, shape=(), dtype=float32)  val_acc:  tf.Tensor(0.875, shape=(), dtype=float32)
epoch  tf.Tensor(2, shape=(), dtype=int32)
train_error:  tf.Tensor(0.00030771855, shape=(), dtype=float32)  train_acc:  tf.Tensor(0.9270833, shape=(), dtype=float32)  val_error:  tf.Tensor(0.0017645856, shape=(), dtype=float32)  val_acc:  tf.Tensor(0.8541667, shape=(), dtype=float32)
epoch  tf.Tensor(3, shape=(), dtype=int32)
train_error:  tf.Tensor(7.789134e-05, shape=(), dtype=float32)  train_acc:  tf.Tensor(0.9453125, shape=(), dtype=float32)  val_error:  tf.Tensor(0.0062182425, shape=(), dtype=float32)  val_acc:  tf.Tensor(0.8125, shape=(), dtype=float32)
epoch  tf.Tensor(4, shape=(), dtype=int32)
train_error:  tf.Tensor(0.00033706325, shape=(), dtype=float32)  train_acc:  tf.Tensor(0.9375, shape=(), dtype=float32)  val_error:  tf.Tensor(0.0020649936, shape=(), dtype=float32)  val_acc:  tf.Tensor(0.825, shape=(), dtype=float32)
epoch  tf.Tensor(5, shape=(), dtype=int32)
train_error:  tf.Tensor(0.00033246292, shape=(), dtype=float32)  train_acc:  tf.Tensor(0.9270833, shape=(), dtype=float32)  val_error:  tf.Tensor(0.0013289611, shape=(), dtype=float32)  val_acc:  tf.Tensor(0.8333333, shape=(), dtype=float32)
epoch  tf.Tensor(6, shape=(), dtype=int32)
train_error:  tf.Tensor(0.0001625357, shape=(), dtype=float32)  train_acc:  tf.Tensor(0.92410713, shape=(), dtype=float32)  val_error:  tf.Tensor(0.0009809914, shape=(), dtype=float32)  val_acc:  tf.Tensor(0.84821427, shape=(), dtype=float32)
epoch  tf.Tensor(7, shape=(), dtype=int32)
train_error:  tf.Tensor(0.00033120092, shape=(), dtype=float32)  train_acc:  tf.Tensor(0.91796875, shape=(), dtype=float32)  val_error:  tf.Tensor(0.0024140156, shape=(), dtype=float32)  val_acc:  tf.Tensor(0.8515625, shape=(), dtype=float32)
epoch  tf.Tensor(8, shape=(), dtype=int32)
train_error:  tf.Tensor(0.00021466451, shape=(), dtype=float32)  train_acc:  tf.Tensor(0.9131944, shape=(), dtype=float32)  val_error:  tf.Tensor(0.002004218, shape=(), dtype=float32)  val_acc:  tf.Tensor(0.8472222, shape=(), dtype=float32)
epoch  tf.Tensor(9, shape=(), dtype=int32)
train_error:  tf.Tensor(0.00012499285, shape=(), dtype=float32)  train_acc:  tf.Tensor(0.915625, shape=(), dtype=float32)  val_error:  tf.Tensor(0.00043017257, shape=(), dtype=float32)  val_acc:  tf.Tensor(0.8625, shape=(), dtype=float32)
time:  27.27366614341736

Process finished with exit code 0

gent.spah · May 22, 2023, 7:20am

I do not have any other thoughts for it to be frank with you!

Topic		Replies	Views
Need Help with a TypeError in Tft Preprocess_fn Machine Learning Data Lifecycle in Production	1	578	August 6, 2021
How do you avoid the "Cast string to float is not supported" error when training a model? Sequences, Time Series and Prediction week-4	1	890	March 21, 2022
Error in C2W2_Assignment for Feature Engineering Machine Learning Engineering for Production(MLOps)	5	324	August 25, 2021
W02 Assignment TypeError: x and y must have the same dtype, got tf.uint8 != tf.float32 Advanced Deployment Scenarios with TensorFlow week-2	2	679	September 24, 2021
Exercise 3 - compute_layer_style_cost: Don't use the numpy API inside compute_layer_style_cost Convolutional Neural Networks	6	967	March 8, 2022

Stuck with an error in graph mode

Related topics