1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
| import numpy as np import matplotlib.pyplot as plt
import tensorflow as tf import tensorflow_datasets as tfds
IMG_SIZE = 160 BATCH_SIZE = 32 SHUFFLE_SIZE = 1000 IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
#数据载入器 #这个类负责载入数据和 准备 数据,用于后续的数据处理。 class DataLoader(object): def __init__(self, image_size, batch_size):
self.image_size = image_size self.batch_size = batch_size
# 80% train data, 10% validation data, 10% test data
(self.train_data_raw, self.validation_data_raw, self.test_data_raw), self.metadata = tfds.load( 'cats_vs_dogs', split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'], with_info=True, as_supervised=True, )
# Get the number of train examples self.num_train_examples = self.metadata.splits['train'].num_examples*80/100 self.get_label_name = self.metadata.features['label'].int2str
# Pre-process data self._prepare_data() self._prepare_batches()
# Resize all images to image_size x image_size #内部方法,用于缩放和归一化数据集里的图像。构造函数需要用到该函数。 def _prepare_data(self): self.train_data = self.train_data_raw.map(self._resize_sample) self.validation_data = self.validation_data_raw.map(self._resize_sample) self.test_data = self.test_data_raw.map(self._resize_sample)
# Resize one image to image_size x image_size #内部方法,用于缩放单张图像。 def _resize_sample(self, image, label): image = tf.cast(image, tf.float32) image = (image/127.5) - 1 image = tf.image.resize(image, (self.image_size, self.image_size)) return image, label
#内部方法,用于将图像打包创建为 batches。创建 train_batches、validation_batches 和 test_batches,分别用于训练、评估过程。 def _prepare_batches(self): self.train_batches = self.train_data.shuffle(1000).batch(self.batch_size) self.validation_batches = self.validation_data.batch(self.batch_size) self.test_batches = self.test_data.batch(self.batch_size)
# Get defined number of not processed images #这个方法用于从原始的、没有经过处理的数据中随机获取固定数量的图像。 def get_random_raw_images(self, num_of_images): random_train_raw_data = self.train_data_raw.shuffle(1000) return random_train_raw_data.take(num_of_images)
data_loader = DataLoader(IMG_SIZE, BATCH_SIZE) plt.figure(figsize=(10, 8)) i = 0 for img, label in data_loader.get_random_raw_images(20): plt.subplot(4, 5, i+1) plt.imshow(img) plt.title("{} - {}".format(data_loader.get_label_name(label), img.shape)) plt.xticks([]) plt.yticks([]) i += 1 plt.tight_layout() plt.show()
#载入 预训练模型 了,这些模型位于 tensorflow.kearas.applications vgg16_base = tf.keras.applications.VGG16(input_shape=IMG_SHAPE, include_top=False, weights='imagenet') vgg16_base.summary() #googlenet_base = tf.keras.applications.InceptionV3(input_shape=IMG_SHAPE, include_top=False, weights='imagenet') #resnet_base = tf.keras.applications.ResNet101V2(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')
#顶部的网络层, class Wrapper(tf.keras.Model): def __init__(self, base_model): super(Wrapper, self).__init__()
self.base_model = base_model self.average_pooling_layer = tf.keras.layers.GlobalAveragePooling2D() self.output_layer = tf.keras.layers.Dense(1)
def call(self, inputs): x = self.base_model(inputs) x = self.average_pooling_layer(x) output = self.output_layer(x) return output
base_learning_rate = 0.0001
vgg16_base.trainable = False vgg16 = Wrapper(vgg16_base) vgg16.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
# googlenet_base.trainable = False # googlenet = Wrapper(googlenet_base) # googlenet.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate), # loss='binary_crossentropy', # metrics=['accuracy'])
# resnet_base.trainable = False # resnet = Wrapper(resnet_base) # resnet.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate), # loss='binary_crossentropy', # metrics=['accuracy']) steps_per_epoch = round(data_loader.num_train_examples) // BATCH_SIZE validation_steps = 20
loss1, accuracy1 = vgg16.evaluate(data_loader.validation_batches, steps=20) #loss2, accuracy2 = googlenet.evaluate(data_loader.validation_batches, steps=20) #loss3, accuracy3 = resnet.evaluate(data_loader.validation_batches, steps=20)
print("--------VGG16---------") print("Initial loss: {:.2f}".format(loss1)) print("Initial accuracy: {:.2f}".format(accuracy1)) print("---------------------------")
history = vgg16.fit(data_loader.train_batches, epochs=10, validation_data=data_loader.validation_batches) loss1, accuracy1 = vgg16.evaluate(data_loader.test_batches, steps = 20) print("--------VGG16---------") print("Loss: {:.2f}".format(loss1)) print("Accuracy: {:.2f}".format(accuracy1)) print("---------------------------")
|