我正在try 在tensorflow中为一个图像分类任务实现t-SNE可视化.我在网上主要发现的都是在Pytorch中实现的.见here.

以下是我的通用代码,用于培训目的,效果非常好,只想将t-SNE可视化添加到其中:

import pandas as pd
import numpy as np
import tensorflow as tf
import cv2
from tensorflow import keras
from tensorflow.keras import layers, Input
from tensorflow.keras.layers import Dense, InputLayer, Flatten
from tensorflow.keras.models import Sequential, Model
from  matplotlib import pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

.
.
.

base_model=tf.keras.applications.ResNet152(
    include_top=False, weights='imagenet', input_tensor=None,
    input_shape=None, pooling=None)

.
.
.

base_model.trainable = False


# Create new model on top.
inputs = tf.keras.Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
x = base_model(inputs, training=False)



x=keras.layers.Flatten()(x)
x = keras.layers.Dense(64)(x)
x=layers.Activation('relu')(x)

x=keras.layers.Flatten()(x)
x = keras.layers.Dense(32)(x)
x=layers.Activation('relu')(x)

x = keras.layers.Dense(2)(x)
outputs=layers.Activation('softmax')(x)

model=keras.Model(inputs, outputs)



vaidation_datagen = ImageDataGenerator(rotation_range=90,
                                     zoom_range=0.2,
                                     horizontal_flip=True, 
                                     vertical_flip=True)


train_generator = train_datagen.flow_from_directory(
        train_path,  # this is the target directory
        target_size=target_size,  # all images will be resized to the target size
        color_mode='rgb',
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical',
        interpolation='nearest',
        seed=42)  # since we use binary_crossentropy loss, we need binary labels


validation_generator = vaidation_datagen.flow_from_directory(
        validation_path,  # this is the target directory
        target_size=target_size,  # all images will be resized to the target size
        color_mode='rgb',
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical',
        interpolation='nearest',
        seed=42)


model.compile(optimizer, loss , metrics)

model_checkpoint = tf.keras.callbacks.ModelCheckpoint((model_path+model_filename), monitor='val_loss',verbose=1, save_best_only=True)
model.summary()


history = model.fit(
     train_generator,
     steps_per_epoch = num_of_train_img_raw//batch_size,
     epochs = epochs, 
     validation_data = validation_generator, # relates to the validation data.
     validation_steps = num_of_val_img_raw//batch_size,
     callbacks=[model_checkpoint],
     use_multiprocessing = False)

根据提供的参考链接,我似乎需要首先保存特征,然后按照如下方式应用t-SNE(该部分从here复制并粘贴):

tsne = TSNE(n_components=2).fit_transform(features)


# scale and move the coordinates so they fit [0; 1] range
def scale_to_01_range(x):
    # compute the distribution range
    value_range = (np.max(x) - np.min(x))

    # move the distribution so that it starts from zero
    # by extracting the minimal value from all its values
    starts_from_zero = x - np.min(x)

    # make the distribution fit [0; 1] by dividing by its range
    return starts_from_zero / value_range

# extract x and y coordinates representing the positions of the images on T-SNE plot
tx = tsne[:, 0]
ty = tsne[:, 1]

tx = scale_to_01_range(tx)
ty = scale_to_01_range(ty)


# initialize a matplotlib plot
fig = plt.figure()
ax = fig.add_subplot(111)

# for every class, we'll add a scatter plot separately
for label in colors_per_class:
    # find the samples of the current class in the data
    indices = [i for i, l in enumerate(labels) if l == label]

    # extract the coordinates of the points of this class only
    current_tx = np.take(tx, indices)
    current_ty = np.take(ty, indices)

    # convert the class color to matplotlib format
    color = np.array(colors_per_class[label], dtype=np.float) / 255

    # add a scatter plot with the corresponding color and label
    ax.scatter(current_tx, current_ty, c=color, label=label)

# build a legend using the labels we set previously
ax.legend(loc='best')

# finally, show the plot
plt.show()

如果你能帮我把这两件事联系起来,我将不胜感激.

推荐答案

您可以try 以下方法:

Train your model

import tensorflow as tf
import pathlib

dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True)
data_dir = pathlib.Path(data_dir)

batch_size = 32

train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  seed=123,
  image_size=(180, 180),
  batch_size=batch_size)


model = tf.keras.Sequential([
  tf.keras.layers.Rescaling(1./255, input_shape=(180, 180, 3)),
  tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(5)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
epochs=10
history = model.fit(
  train_ds,
  epochs=epochs
)

Make predictions on last and second last layer of your model and visualize

from sklearn.manifold import TSNE
import numpy as np
from  matplotlib import pyplot as plt

model2 = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].output)
test_ds = np.concatenate(list(train_ds.take(5).map(lambda x, y : x))) # get five batches of images and convert to numpy array
features = model2(test_ds)
labels = np.argmax(model(test_ds), axis=-1)
tsne = TSNE(n_components=2).fit_transform(features)

def scale_to_01_range(x):

    value_range = (np.max(x) - np.min(x))
    starts_from_zero = x - np.min(x)
    return starts_from_zero / value_range

tx = tsne[:, 0]
ty = tsne[:, 1]

tx = scale_to_01_range(tx)
ty = scale_to_01_range(ty)

colors = ['red', 'blue', 'green', 'brown', 'yellow']
classes = train_ds.class_names
print(classes)
fig = plt.figure()
ax = fig.add_subplot(111)
for idx, c in enumerate(colors):
    indices = [i for i, l in enumerate(labels) if idx == l]
    current_tx = np.take(tx, indices)
    current_ty = np.take(ty, indices)
    ax.scatter(current_tx, current_ty, c=c, label=classes[idx])

ax.legend(loc='best')
plt.show()

enter image description here

model2输出您想要可视化的特征,modelnp.argmax的帮助下输出预测的类.此外,本例使用的数据集有5个类,这就是为什么有5种不同的 colored颜色 .在你的例子中,你只有两个类,因此有两种 colored颜色 .

Python相关问答推荐

使用子字符串动态更新Python DataFrame中的列

将大小为n*512的数组绘制到另一个大小为n*256的数组的PC组件

当测试字符串100%包含查询字符串时,为什么t fuzzywuzzy s Process.extractBests不给出100%分数?

将numpy矩阵映射到字符串矩阵

覆盖Django rest响应,仅返回PK

Python中的负前瞻性regex遇到麻烦

如何让 turtle 通过点击和拖动来绘制?

提取两行之间的标题的常规表达

如何在图片中找到这个化学测试条?OpenCV精明边缘检测不会绘制边界框

Vectorize多个头寸的止盈/止盈回溯测试pythonpandas

在Mac上安装ipython

我对我应该做什么以及我如何做感到困惑'

如何从pandas的rame类继承并使用filepath实例化

Pandas DataFrame中行之间的差异

使用Python更新字典中的值

如何指定列数据类型

* 动态地 * 修饰Python中的递归函数

找到相对于列表索引的当前最大值列表""

在代码执行后关闭ChromeDriver窗口

基于Scipy插值法的三次样条系数