跳至主要内容

訓練後量化_TensorFolw_Lite_Quantization_鐵人賽示範

Open In Colab

20.TensorFolw Lite Quantization

# 建立評估模型的dict
MODEL_SIZE = {}
ACCURACY = {}
import tensorflow as tf
import numpy as np
import os

建立基本模型

  • 模型採用tf.keras.datasets.mnist,用CNN進行建模。
# Load MNIST dataset
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the input image so that each pixel value is between 0 to 1.
train_images = train_images / 255.0
test_images = test_images / 255.0
def model_builder():

keras = tf.keras

model = keras.Sequential([
keras.layers.InputLayer(input_shape=(28, 28)),
keras.layers.Reshape(target_shape=(28, 28, 1)),
keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2, 2)),
keras.layers.Flatten(),
keras.layers.Dense(10, activation='softmax')
])

return model
baseline_model = model_builder()
baseline_model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)

baseline_model.summary()
baseline_model.save_weights('baseline_weights.h5')

baseline_model.fit(train_images, train_labels, epochs=1, shuffle=False)
# 儲存未量化模型
baseline_model.save('non_quantized.h5', include_optimizer=False)

# 評估模型並紀錄準確率
_, ACCURACY['baseline Keras model'] = baseline_model.evaluate(test_images, test_labels)

# 紀錄模型大小
MODEL_SIZE['baseline h5'] = os.path.getsize('non_quantized.h5')

ACCURACY
MODEL_SIZE

轉為 TF Lite 格式

  • 轉為 TF Lite 使用的 *.tflite格式。
converter = tf.lite.TFLiteConverter.from_keras_model(baseline_model)

tflite_model = converter.convert()

with open('non_quantized.tflite', 'wb') as f:
f.write(tflite_model)
  • 建立TF Lite 的評估模型準確率的函數,轉檔為tflite後需要特別撰寫,參考官方範例
# A helper function to evaluate the TF Lite model using "test" dataset.
# from: https://www.tensorflow.org/lite/performance/post_training_integer_quant_16x8#evaluate_the_models
def evaluate_model(filemane):
#Load the model into the interpreters
interpreter = tf.lite.Interpreter(model_path=str(filemane))
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]

# Run predictions on every image in the "test" dataset.
prediction_digits = []
for test_image in test_images:
# Pre-processing: add batch dimension and convert to float32 to match with
# the model's input data format.
test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
interpreter.set_tensor(input_index, test_image)

# Run inference.
interpreter.invoke()

# Post-processing: remove batch dimension and find the digit with highest
# probability.
output = interpreter.tensor(output_index)
digit = np.argmax(output()[0])
prediction_digits.append(digit)

# Compare prediction results with ground truth labels to calculate accuracy.
accurate_count = 0
for index in range(len(prediction_digits)):
if prediction_digits[index] == test_labels[index]:
accurate_count += 1
accuracy = accurate_count * 1.0 / len(prediction_digits)

return accuracy
  • 精確值略有提升,模型大小略降
ACCURACY['non quantized tflite'] = evaluate_model(filemane='non_quantized.tflite')
ACCURACY
MODEL_SIZE['non quantized tflite'] = os.path.getsize('non_quantized.tflite')
MODEL_SIZE

訓練後量化 Post-Training Quantization

  • 本範例示範訓練後量化之動態範圍量化 Dynamic range quantization 。
  • 您也可以嘗試固定float8、float16量化。
# Dynamic range quantization
converter = tf.lite.TFLiteConverter.from_keras_model(baseline_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open('post_training_quantized.tflite', 'wb') as f:
f.write(tflite_model)
  • 模型大小下降許多,精準度略有提升
ACCURACY['post training quantized tflite'] = evaluate_model(filemane='post_training_quantized.tflite')
ACCURACY
MODEL_SIZE['post training quantized tflite'] = os.path.getsize('post_training_quantized.tflite')
MODEL_SIZE

(選用)量化感知訓練 Quantization Aware Training

  • 當訓練後量化導致您的準確率下降多到無法接受,可以考慮在量化模型之前進行量化感知訓練 Quantization Aware Training
  • 此方法為在訓練期間在模型中插入假量化節點來模擬精度損失,讓模型學會適應精度損失,以獲得更準確的預測。
  • 需使用 tensorflow_model_optimization 模組,該模組提供 quantize_model() 完成任務。
  • 調整後再量化可舒緩準確率下降的問題。
!pip install tensorflow_model_optimization
  • 使用先前初步訓練的 'baseline_weights.h5' 模型權重進行優化。
  • 模型增加了些假結點與 Layer。
import tensorflow_model_optimization as tfmot

# method to quantize a Keras model
quantize_model = tfmot.quantization.keras.quantize_model

# Define the model architecture.
model_to_quantize = model_builder()

# Reinitialize weights with saved file
model_to_quantize.load_weights('baseline_weights.h5')

# Quantize the model
q_aware_model = quantize_model(model_to_quantize)

# `quantize_model` requires a recompile.
q_aware_model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])

q_aware_model.summary()
q_aware_model.save('quantization_aware_non-quantized.h5', include_optimizer=False)
  • 訓練經過感知訓練的模型,您可以自行調整 epochs。
# Train the model
q_aware_model.fit(train_images, train_labels, epochs=10, shuffle=False)
_, ACCURACY['quantization aware non-quantized'] = q_aware_model.evaluate(test_images, test_labels, verbose=0)
ACCURACY
MODEL_SIZE['quantization aware non-quantized'] = os.path.getsize('quantization_aware_non-quantized.h5')
MODEL_SIZE