Classify Images using MobileNet and TensorFlow Lite

Classify Images using MobileNet and TensorFlow Lite

MobileNet is a convolutional neural network (CNN) that designed for mobile and embedded devices. MobileNet is often used for image classification. This tutorial demonstrates how to classify images using MobileNet and TensorFlow Lite.

Prepare environment

Download pre-trained MobileNet model:

curl -Lo mobilenet_v1_1.0_224.tflite https://tfhub.dev/tensorflow/lite-model/mobilenet_v1_1.0_224/1/default/1?lite-format=tflite

A model was trained on ImageNet dataset for 1001 classes. Class 0 for "background", followed by 1000 actual ImageNet classes. Image input size 224x224.

Download ImageNet class labels:

curl -o labels.txt https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt

Download image for testing:

curl -o test.bmp https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/lite/examples/label_image/testdata/grace_hopper.bmp

Inference

Class labels are written to the array from a file. TensorFlow Lite model is loaded into memory. During preprocessing, the image is resized, and pixel values are normalized to the range [-1, 1]. To classify the image, a model is invoked. A model returns a list of class probabilities. The largest probability is mapped to the relevant category.

main.py

from tflite_runtime.interpreter import Interpreter
import numpy as np
import cv2


def readLabels(labelsFile):
    with open(labelsFile, 'r') as file:
        labels = [line.strip() for line in file.readlines()]

    return labels


def preprocessImage(img):
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.expand_dims(img, axis=0)

    return np.float32((1 / 127.5) * img - 1)


def main():
    LABELS_FILE = 'labels.txt'
    IMG_FILE = 'test.bmp'
    MODEL_FILE = 'mobilenet_v1_1.0_224.tflite'
    
    labels = readLabels(LABELS_FILE)

    img = cv2.imread(IMG_FILE)
    img = preprocessImage(img)

    interpreter = Interpreter(MODEL_FILE)
    interpreter.allocate_tensors()

    inputDetails = interpreter.get_input_details()
    interpreter.set_tensor(inputDetails[0]['index'], img)
    interpreter.invoke()

    outputDetails = interpreter.get_output_details()
    outputData = interpreter.get_tensor(outputDetails[0]['index'])
    outputData = np.squeeze(outputData)

    idx = np.argmax(outputData)
    label = labels[idx]
    output = outputData[idx]

    print('%.6f: %s (%s)' % (output, label, idx))


main()

main.cpp

#include <iostream>
#include <fstream>
#include <algorithm>
#include <opencv2/opencv.hpp>
#include <tensorflow/lite/interpreter.h>
#include <tensorflow/lite/kernels/register.h>

using namespace cv;
using namespace tflite;

void readLabels(const char *labelsFile, std::vector<std::string> &labels)
{
    std::string line;
    std::ifstream fin(labelsFile);
    while (getline(fin, line)) {
        labels.push_back(line);
    }
}

void preprocessImage(Mat &img)
{
    resize(img, img, Size(224, 224), 0, 0, INTER_AREA);
    cvtColor(img, img, COLOR_BGRA2RGB);
    img.convertTo(img, CV_32FC3, 1 / 127.5f, -1);
}

int main()
{
    const int NUM_CLASSES = 1001;
    const char *LABELS_FILE = "labels.txt";
    const char *IMG_FILE = "test.bmp";
    const char *MODEL_FILE = "mobilenet_v1_1.0_224.tflite";

    std::vector<std::string> labels;
    readLabels(LABELS_FILE, labels);

    Mat img = imread(IMG_FILE);
    preprocessImage(img);

    std::unique_ptr<FlatBufferModel> model = FlatBufferModel::BuildFromFile(MODEL_FILE);
    ops::builtin::BuiltinOpResolver resolver;
    std::unique_ptr<Interpreter> interpreter;
    InterpreterBuilder(*model, resolver)(&interpreter);
    interpreter->AllocateTensors();

    auto *inputTensor = interpreter->typed_input_tensor<float>(0);
    memcpy(inputTensor, img.data, img.total() * img.elemSize());
    interpreter->Invoke();

    auto *outputData = interpreter->typed_output_tensor<float>(0);

    float *output = std::max_element(outputData, outputData + NUM_CLASSES);
    long idx = output - outputData;
    std::string label = labels[idx];

    std::cout << *output << ": " << label << " (" << idx << ")" << std::endl;

    return 0;
}

main.cpp

#include <iostream>
#include <fstream>
#include <algorithm>
#include <opencv2/opencv.hpp>
#include <tensorflow/lite/c/common.h>
#include <tensorflow/lite/c/c_api.h>

using namespace cv;

void readLabels(const char *labelsFile, std::vector<std::string> &labels)
{
    std::string line;
    std::ifstream fin(labelsFile);
    while (getline(fin, line)) {
        labels.push_back(line);
    }
}

void preprocessImage(Mat &img)
{
    resize(img, img, Size(224, 224), 0, 0, INTER_AREA);
    cvtColor(img, img, COLOR_BGRA2RGB);
    img.convertTo(img, CV_32FC3, 1 / 127.5f, -1);
}

int main()
{
    const int NUM_CLASSES = 1001;
    const char *LABELS_FILE = "labels.txt";
    const char *IMG_FILE = "test.bmp";
    const char *MODEL_FILE = "mobilenet_v1_1.0_224.tflite";

    std::vector<std::string> labels;
    readLabels(LABELS_FILE, labels);

    Mat img = imread(IMG_FILE);
    preprocessImage(img);

    TfLiteModel *model = TfLiteModelCreateFromFile(MODEL_FILE);
    TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate();
    TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
    TfLiteInterpreterAllocateTensors(interpreter);

    TfLiteTensor *inputTensor = TfLiteInterpreterGetInputTensor(interpreter, 0);
    TfLiteTensorCopyFromBuffer(inputTensor, img.data, img.total() * img.elemSize());
    TfLiteInterpreterInvoke(interpreter);

    float outputData[NUM_CLASSES];
    const TfLiteTensor *outputTensor = TfLiteInterpreterGetOutputTensor(interpreter, 0);
    TfLiteTensorCopyToBuffer(outputTensor, outputData, NUM_CLASSES * sizeof(float));

    float *output = std::max_element(outputData, outputData + NUM_CLASSES);
    long idx = output - outputData;
    std::string label = labels[idx];

    TfLiteInterpreterDelete(interpreter);
    TfLiteInterpreterOptionsDelete(options);
    TfLiteModelDelete(model);

    std::cout << *output << ": " << label << " (" << idx << ")" << std::endl;

    return 0;
}

C++ code was built using CMake.

CMake is not required.

CMakeLists.txt

cmake_minimum_required(VERSION 3.22)
project(app)

set(CMAKE_CXX_STANDARD 14)

add_executable(app main.cpp)

target_link_libraries(app tensorflow-lite opencv_core opencv_imgcodecs opencv_imgproc)

CMakeLists.txt

cmake_minimum_required(VERSION 3.22)
project(app)

set(CMAKE_CXX_STANDARD 14)

add_executable(app main.cpp)

target_link_libraries(app tensorflowlite_c opencv_core opencv_imgcodecs opencv_imgproc)

In our case, the following output is returned:

0.912099: military uniform (653)
0.912099: military uniform (653)
0.918451: military uniform (653)

Image was classified correctly with probability ~91%.

We noticed that using C++ (C API) getting slightly different result than using Python or C++.

Leave a Comment

Cancel reply

Your email address will not be published.