訓練後量化_TensorFolw_Lite_Quantization_鐵人賽示範
20.TensorFolw Lite Quantization
- 此為鐵人賽系列文示範文件,參考TensorFlow Lite官方範例修改而成。
- TF Lite 評估函數參考來源。
# 建立評估模型的dict
MODEL_SIZE = {}
ACCURACY = {}
import tensorflow as tf
import numpy as np
import os
建立基本模型
- 模型採用
tf.keras.datasets.mnist
,用CNN進行建模。
# Load MNIST dataset
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# Normalize the input image so that each pixel value is between 0 to 1.
train_images = train_images / 255.0
test_images = test_images / 255.0
def model_builder():
keras = tf.keras
model = keras.Sequential([
keras.layers.InputLayer(input_shape=(28, 28)),
keras.layers.Reshape(target_shape=(28, 28, 1)),
keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
keras.layers.MaxPooling2D(pool_size=(2, 2)),
keras.layers.Flatten(),
keras.layers.Dense(10, activation='softmax')
])
return model
baseline_model = model_builder()
baseline_model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
baseline_model.summary()
baseline_model.save_weights('baseline_weights.h5')
baseline_model.fit(train_images, train_labels, epochs=1, shuffle=False)
# 儲存未量化模型
baseline_model.save('non_quantized.h5', include_optimizer=False)
# 評估模型並紀錄準確率
_, ACCURACY['baseline Keras model'] = baseline_model.evaluate(test_images, test_labels)
# 紀錄模型大小
MODEL_SIZE['baseline h5'] = os.path.getsize('non_quantized.h5')
ACCURACY
MODEL_SIZE
轉為 TF Lite 格式
- 轉為 TF Lite 使用的
*.tflite
格式。
converter = tf.lite.TFLiteConverter.from_keras_model(baseline_model)
tflite_model = converter.convert()
with open('non_quantized.tflite', 'wb') as f:
f.write(tflite_model)
- 建立TF Lite 的評估模型準確率的函數,轉檔為tflite後需要特別撰寫,參考官方範例。
# A helper function to evaluate the TF Lite model using "test" dataset.
# from: https://www.tensorflow.org/lite/performance/post_training_integer_quant_16x8#evaluate_the_models
def evaluate_model(filemane):
#Load the model into the interpreters
interpreter = tf.lite.Interpreter(model_path=str(filemane))
interpreter.allocate_tensors()
input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]
# Run predictions on every image in the "test" dataset.
prediction_digits = []
for test_image in test_images:
# Pre-processing: add batch dimension and convert to float32 to match with
# the model's input data format.
test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
interpreter.set_tensor(input_index, test_image)
# Run inference.
interpreter.invoke()
# Post-processing: remove batch dimension and find the digit with highest
# probability.
output = interpreter.tensor(output_index)
digit = np.argmax(output()[0])
prediction_digits.append(digit)
# Compare prediction results with ground truth labels to calculate accuracy.
accurate_count = 0
for index in range(len(prediction_digits)):
if prediction_digits[index] == test_labels[index]:
accurate_count += 1
accuracy = accurate_count * 1.0 / len(prediction_digits)
return accuracy
- 精確值略有提升,模型大小略降
ACCURACY['non quantized tflite'] = evaluate_model(filemane='non_quantized.tflite')
ACCURACY
MODEL_SIZE['non quantized tflite'] = os.path.getsize('non_quantized.tflite')
MODEL_SIZE
訓練後量化 Post-Training Quantization
- 本範例示範訓練後量化之動態範圍量化 Dynamic range quantization 。
- 您也可以嘗試固定float8、float16量化。
# Dynamic range quantization
converter = tf.lite.TFLiteConverter.from_keras_model(baseline_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
with open('post_training_quantized.tflite', 'wb') as f:
f.write(tflite_model)
- 模型大小下降許多,精準度略有提升
ACCURACY['post training quantized tflite'] = evaluate_model(filemane='post_training_quantized.tflite')
ACCURACY
MODEL_SIZE['post training quantized tflite'] = os.path.getsize('post_training_quantized.tflite')
MODEL_SIZE
(選用)量化感知訓練 Quantization Aware Training
- 當訓練後量化導致您的準確率下降多到無法接受,可以考慮在量化模型之前進行量化感知訓練 Quantization Aware Training。
- 此方法為在訓練期間在模型中插入假量化節點來模擬精度損失,讓模型學會適應精度損失,以獲得更準確的預測。
- 需使用
tensorflow_model_optimization
模組,該模組提供quantize_model()
完成任務。 - 調整後再量化可舒緩準確率下降的問題。
!pip install tensorflow_model_optimization
- 使用先前初步訓練的 'baseline_weights.h5' 模型權重進行優化。
- 模型增加了些假結點與 Layer。
import tensorflow_model_optimization as tfmot
# method to quantize a Keras model
quantize_model = tfmot.quantization.keras.quantize_model
# Define the model architecture.
model_to_quantize = model_builder()
# Reinitialize weights with saved file
model_to_quantize.load_weights('baseline_weights.h5')
# Quantize the model
q_aware_model = quantize_model(model_to_quantize)
# `quantize_model` requires a recompile.
q_aware_model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
q_aware_model.summary()
q_aware_model.save('quantization_aware_non-quantized.h5', include_optimizer=False)
- 訓練經過感知訓練的模型,您可以自行調整 epochs。
# Train the model
q_aware_model.fit(train_images, train_labels, epochs=10, shuffle=False)
_, ACCURACY['quantization aware non-quantized'] = q_aware_model.evaluate(test_images, test_labels, verbose=0)
ACCURACY
MODEL_SIZE['quantization aware non-quantized'] = os.path.getsize('quantization_aware_non-quantized.h5')
MODEL_SIZE