准备工作
环境
由于需要各种依赖非常麻烦,不如使用anaconda一把梭来的方便。
# disable auto_active conda config --set auto_activate_base false # active an environment conda activate <spacename> # add mirrors conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2/ conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/ conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ # create # conda create -n pytorch python=3.7 # conda activate pytorch conda install pytorch torchvision cpuonly -c pytorch conda install opencv-python conda install scipy
装完之后每次开机默认进conda环境比较烦,就把它关掉了。
数据集下载
由于cornell大学在国外,数据下载速度可达1kb/s,而且经常connection reset by peer/connection closed by remote host。原本代码里每运行一遍就下载一次是不可能实现的。只好先把数据爬下来,放到localhost上用。
import urllib from bs4 import BeautifulSoup import requests def listFD(url, ext=''): page = requests.get(url).text soup = BeautifulSoup(page, 'html.parser') return [url + '/' + node.get('href') for node in soup.find_all('a') if node.get('href').endswith(ext)] def url_to_image(url): # download the image, convert it to a NumPy array, and then read # it into OpenCV format resp = urllib.request.urlopen(url) image = np.asarray(bytearray(resp.read()), dtype="uint8") image = cv2.imdecode(image, cv2.IMREAD_COLOR) # Convert BGR to RGB image = image[:, :, [2,1,0]] return image DATA_URL = "http://www.cs.cornell.edu/courses/cs5670/2019sp/projects/pa5/assignment_data/dataset/" IMAGENET_LABELS_URL = "http://www.cs.cornell.edu/courses/cs5670/2019sp/projects/pa5/assignment_data/imagenet_classes.txt" DOGS_DIR = "test-dog" FOOD_DIR = "test-food" food_paths = listFD(DATA_URL + FOOD_DIR, ".jpg") dog_paths = listFD(DATA_URL + DOGS_DIR, ".jpg") #----------- import os for url in dog_paths: name='/home/fffasttime/userres/cv/data/test-dog/'+url.split('/')[-1] if os.path.exists(name): continue urllib.request.urlretrieve(url,name) #----------- import os for url in food_paths: name='/home/fffasttime/userres/cv/data/test-food/'+url.split('/')[-1] if os.path.exists(name): continue urllib.request.urlretrieve(url,name)
wget http://www.cs.cornell.edu/courses/cs5670/2019sp/projects/pa5/expected/compute_dscore_dimage-grad.npz wget http://www.cs.cornell.edu/courses/cs5670/2019sp/projects/pa5/expected/normalized_sgd_with_momentum_update-grad.npz wget http://www.cs.cornell.edu/courses/cs5670/2019sp/projects/pa5/expected/normalized_sgd_with_momentum_update-velocity.npz wget http://www.cs.cornell.edu/courses/cs5670/2019sp/projects/pa5/expected/normalized_sgd_with_momentum_update-new_data.npz wget http://www.cs.cornell.edu/courses/cs5670/2019sp/projects/pa5/expected/normalized_sgd_with_momentum_update-new_velocity.npz wget http://www.cs.cornell.edu/courses/cs5670/2019sp/projects/pa5/expected/fooling_image_gradient-grad.npz wget http://www.cs.cornell.edu/courses/cs5670/2019sp/projects/pa5/expected/class_visualization_gradient-cur_data.npz wget http://www.cs.cornell.edu/courses/cs5670/2019sp/projects/pa5/expected/class_visualization_gradient-grad.npz # copy to server scp -r ~/download/expected/ root@101.132.170.22:/var/www/html/res/cv/expected
实验内容
本实验使用训练好参数的AlexNet作为测试内容。和训练网络时有点不同,训练网络一般是对网络参数求偏导进行梯度下降更新。而本实验多是计算对输入的偏导以揭示网络的特性。
可视化
通过对AlexNet的可视化,可以直观地感受神经网络的工作过程。
def vis_square(data, title=None): """Take a Tensor of shape (n, K, height, width) or (n, K, height, width) and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)""" if data.size(1) > 3: data = data.view(-1, 1, data.size(2), data.size(3)) data = to_numpy_image(data) # normalize data for display data = (data - data.min()) / (data.max() - data.min()) # force the number of filters to be square n = int(np.ceil(np.sqrt(data.shape[0]))) padding = (((0, n ** 2 - data.shape[0]), (0, 2), (0, 2)) # add some space between filters + ((0, 0),) * (data.ndim - 3)) # don't pad the last dimension (if there is one) data = np.pad(data, padding, mode='constant', constant_values=1) # pad with ones (white) # tile the filters into an image data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1))) data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:]) data = data.squeeze() # plot it plt.figure(figsize=(8, 8)) plt.imshow(data) plt.axis('off') if title: plt.title(title) weights = model.features[0].weight.data vis_square(weights, title="Visualizing filters in conv1")
这段代码画出了第一层卷积核conv1的内容,可以发现是对边缘、点、格子等特征的提取。
import functools # Re-define the model to clear any previously registered hooks model = alexnet(pretrained=True, num_classes=1000) # Define a hook that visualizes a layer output def show_activations_hook(name, module, input, output): print(f"Visualizing layer: {name}") # For conv/relu layer outputs (BxCxHxW) we plot an image as before if output.dim() == 4: vis_square(output, f"Activations on: {name}") # For linear layer outputs, we plot the activations as a line plot else: feat = output.data.view([-1]).cpu().numpy() plt.figure(figsize=(15, 3)) plt.plot(feat) plt.title(f"Activations on: {name}") # Register the hook on the select set of modules for name, module in model.shortcut_modules(): hook = functools.partial(show_activations_hook, name) module.register_forward_hook(hook) # PyTorch modules work on minibatches and expect the first axis to be the batch axis # If we run a model on a single image, we must turn this into a batch of size 1 model_input = Variable(example_image[np.newaxis, ...]) # Run the forward pass on the model class_activations = model(model_input)[0]
这段代码计算一只狗作为输入图像时,AlexNet各个层的响应情况。越靠后的层会提取越高级的特征。
Todo 1
import torch.nn as nn import torch.utils.model_zoo as model_zoo model_urls = { 'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth', } INPLACE = False class AlexNet(nn.Module): def __init__(self, num_classes=1000): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=INPLACE), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=INPLACE), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=INPLACE), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=INPLACE), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=INPLACE), nn.MaxPool2d(kernel_size=3, stride=2), ) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=INPLACE), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=INPLACE), nn.Linear(4096, num_classes), ) self.module_shortcuts = { "conv1": self.features[0], "relu1": self.features[1], "conv2": self.features[3], "relu2": self.features[4], "conv3": self.features[6], "relu3": self.features[7], "conv4": self.features[8], "relu4": self.features[9], "conv5": self.features[10], "relu5": self.features[11], "fc6": self.classifier[2], "fc7": self.classifier[5], "fc8": self.classifier[6] } def __getitem__(self, layer_name): if layer_name in self.module_shortcuts: return self.module_shortcuts[layer_name] return None def shortcut_modules(self): for name, mod in self.module_shortcuts.items(): yield name, mod def forward(self, x): x = self.features(x) x = x.view(x.size(0), 256 * 6 * 6) x = self.classifier(x) return x def alexnet(pretrained=False, **kwargs): r"""AlexNet model architecture from the `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = AlexNet(**kwargs) if pretrained: print("Loading pre-trained weights from model zoo") model.load_state_dict(model_zoo.load_url(model_urls['alexnet'])) model.eval() return model model = alexnet(pretrained=True, num_classes=1000)
上述代码定义了AlexNet的网络结构,问为什么fc6和fc7层输出的结果一定是正的?ReLU函数输出当然是正的。
Todo 2 分类
def convert_ilsvrc2012_probs_to_dog_vs_food_probs(probs_ilsvrc): """ Convert from 1000-class ILSVRC probabilities to 2-class "dog vs food" incices. Use the variables "dog_indices" and "food_indices" to map from ILSVRC2012 classes to our classes. HINT: Compute "probs" by first estimating the probability of classes 0 and 1, using probs_ilsvrc. Stack together the two probabilities along axis 1, and then normalize (along axis 1). :param probs_ilsvrc: shape (N, 1000) probabilities across 1000 ILSVRC classes :return probs: shape (N, 2): probabilities of each of the N items as being either dog (class 0) or food (class 1). """ # in the ILSVRC2012 dataset, indices 151-268 are dogs and index 924-969 are foods dog_indices = range(151, 269) food_indices = range(924, 970) N, _ = probs_ilsvrc.shape probs = np.zeros((N, 2)) # placeholder ############################ TODO 2 BEGIN ################################# probs[:,0] = np.sum(probs_ilsvrc[:,151:269],axis=1) probs[:,1] = np.sum(probs_ilsvrc[:,924:970],axis=1) norm = np.sum(probs,axis=1) probs[:,0] /= norm probs[:,1] /= norm ############################ TODO 2 END ################################# return probs
# Re-define the model to clear any previously registered hooks model = alexnet(pretrained=True, num_classes=1000) N = len(full_dataset) dataloader = DataLoader( full_dataset, batch_size=1024, shuffle=False, num_workers=0, pin_memory=False, drop_last=False) assert len(dataloader) == 1, "Since batch_size is bigger than the number of examples, we should only have one batch" for batch in dataloader: images = batch[0] labels = batch[1] labels_np = labels.numpy() class_activations = model(Variable(images)) ilsvrc_class_probs = F.softmax(class_activations, dim=1) # Convert from Variable containing FloatTensor to numpy ndarray ilsvrc_class_probs_np = ilsvrc_class_probs.data.cpu().numpy() dogfood_class_probs_np = convert_ilsvrc2012_probs_to_dog_vs_food_probs(ilsvrc_class_probs_np) assert list(dogfood_class_probs_np.shape) == [N, 2] np.testing.assert_almost_equal(np.sum(dogfood_class_probs_np, axis=1), np.ones(N), decimal=5) print("Seems correct!") predicted_class = np.argmax(dogfood_class_probs_np, axis=1) correct_mask = predicted_class == labels_np num_correct = np.sum(correct_mask) accuracy = 100.0 * float(num_correct) / N print(f"Overall accuracy {accuracy} {num_correct} / {N}") for cidx, cname in enumerate(classes): cls_mask = labels_np == cidx predicted_cls = predicted_class[cls_mask] num_correct = np.sum(predicted_cls == cidx) cls_acc = 100.0 * float(num_correct) / cls_mask.sum() print(f"{cname} class accuracy {cls_acc} {num_correct} / {cls_mask.sum()}")
Overall accuracy 93.10344827586206 54 / 58 dog class accuracy 90.9090909090909 30 / 33 food class accuracy 96.0 24 / 25
使用AlexNet来区分食物和狗,达到93%的准确率。
测试集中包含了几张带有迷惑性的图片,例如把狗套上热狗,或者把食物做成动物的形状。AlexNet多数情况下未被欺骗,说明神经网络有较好的鲁棒性。
Todo 3 排序
将分类器输出排序,以便按置信度从高到低显示分类结果。分类器一般对正确分类的图片置信度达到99%。
def get_prediction_descending_order_indices(probs, cidx): """ Returns the ordering of probs that would sort it in descending order :param probs: (N, 2) probabilities (computed in TODO 2) :param cidx: class index (0 or 1) :return list of N indices that sorts the array in descending order """ order = range(probs.shape[0]) # placeholder ############################ TODO 3 BEGIN ################################# order = sorted(order, key = lambda x:probs[x][cidx], reverse=True) ############################ TODO 3 END ################################# return order #----------- for cidx, cname in enumerate(classes): print(f"Predictions for class: {cname}") cls_mask = labels_np == cidx predicted_cls = predicted_class[cls_mask] cls_probs = dogfood_class_probs_np[cls_mask] correct = [p == cidx for p in predicted_cls] cls_images = [images[i] for i in range(len(images)) if cls_mask[i]] order = get_prediction_descending_order_indices(cls_probs, cidx) assert len(order) == cls_mask.sum() show_images( images = [cls_images[i] for i in order], correct_list = [correct[i] for i in order], titles = [f"Prob: {cls_probs[i, cidx]}" for i in order], size=128 )
Todo 4 可视化显著特征
求结果对图像输入的偏导\(\frac{\partial y_s}{\partial I}\)可以得到图像中能使输出显著变化的区域,也就是特征。
def compute_dscore_dimage(scores, image, class_idx): """ Returns the gradient of s_y (the score at index class_idx) with respect to the image (data), ds_y / dI. Note that this is the unnormalized class score "s", not the probability "p". :param scores: (Variable) shape (1000) the output scores from AlexNet for image :param image: (Variable) shape (1, 3, 224, 244) the input image :param class_idx: class index in range [0, 999] indicating which class to compute saliency for :return grad: (Tensor) shape (3, 224, 224), gradient ds_y / dI """ grad = torch.zeros_like(image) # placeholder ############################ TODO 4 BEGIN ################################# sc = scores[class_idx] sc.backward() grad = image.grad ############################ TODO 4 END ################################# assert tuple(grad.shape) == (1, 3, 224, 224) # expected shape return grad[0]
model = alexnet(pretrained=True) def visualize_saliency(image): image_in = Variable(image.unsqueeze(0), requires_grad=True) cls_scores = model(image_in)[0] max_score, max_idx = torch.max(cls_scores, 0) grad = compute_dscore_dimage(cls_scores, image_in, max_idx) vis = grad * grad vis, _ = torch.max(vis, 0) return vis class_datasets = [dog_dataset, food_dataset] num_images = 6 for cidx, cname in enumerate(classes): print(f"Saliency for class: {cname}") in_images = [] vis_images = [] for i, sample in enumerate(class_datasets[cidx]): image_i = sample[0] label_i = sample[1] vis_image = visualize_saliency(image_i) assert list(vis_image.shape) == [224, 224] vis_images.append(vis_image.unsqueeze(0)) in_images.append(image_i) if i >= num_images: break row_list = list(zip(in_images, vis_images)) show_image_rows(row_list) cls_mask = labels_np == cidx predicted_cls = predicted_class[cls_mask] cls_probs = dogfood_class_probs_np[cls_mask]
Todo 5 迷惑行为
虽然神经网络大部分情况下能很好地识别图像,但如果在图像上加一个很小的扰动,就能严重欺骗神经网络,使之能很自信地输出错误结果。
这样的输入图像只要在相同的网络上修改一下损失,就很容易训练出来了。
$$L=-s_y(I) + R(I) \\ R(I)=0.5\lambda \|I-I_{orig}\|_2^2$$
这个损失表示使输出尽可能错误地情况下使图像尽可能相似。
计算这个I的过程和训练神经网络完全相同,可以使用各种优化方式求解。这里使用附加动量的梯度下降法计算。注意实验test中的lr实际应为45,否则无法通过数据测试。
def normalized_sgd_with_momentum_update(image, grad, velocity, momentum, learning_rate): """ :param image: (Variable) shape (1, 3, 224, 244) the current solution :param grad: (Variable) gradient of the loss with respect to the image :param velocity: (Variable) momentum vector "V" :param momentum: (float) momentum parameter "mu" :param learning_rate: (float) learning rate "alpha" :return: (Variable) the updated image and momentum vector (image, velocity) """ ############################ TODO 5a BEGIN ################################# velocity = momentum * velocity - learning_rate * (grad / np.linalg.norm(grad.data.numpy())) image = image + velocity ############################ TODO 5a BEGIN ################################# return image, velocity def fooling_image_gradient(target_score, orig_data, image_in, target_class, reg_lambda): """ Compute the gradient for make_fooling_image (dL / dI). :param target_score: (Variable) holding the current score assigned to the target class :param orig_data: (Variable) shape (1, 3, 224, 224) holding the original image :param image_in: (Variable) shape (1, 3, 224, 224) hoding the current solution :param target_class: (int) ILSVRC class in range [0, 999] :param reg_lambda: (float) weight applied to the regularizer. :return grad: (Variable) gradient dL / dI """ grad = torch.zeros_like(image_in) # placeholder ############################ TODO 5b BEGIN ################################# norm = torch.norm(orig_data - image_in) loss = -target_score + 0.5 * reg_lambda * norm * norm loss.backward() grad = image_in.grad ############################ TODO 5b END ################################# assert tuple(grad.shape) == (1, 3, 224, 224) # expected shape return grad
model = alexnet(pretrained=True) # Set the model to eval mode model.eval() def make_fooling_image(image, target_class, learning_rate, regularization, num_iter, momentum, threshold=0.9): """ Fool AlexNet into thinking that any image has a particular class, by perturbing it just a little bit :param image: starting image CxHxW tensor :param target_class: the class that this will become after optimization :param learning_rate: either a constant, or a function that returns the learning rate at each iteration :param regularization: lambda parameter to multiply the regularizer :param num_iter: maximum number of iterations :param momentum: amount of momentum to use in the SGD :param threshold: the target score for target_class """ # Create batch dimension and turn into a variable image = image[np.newaxis, ...] print(f"Fooling AlexNet into thinking this is a: {labels_id2word[target_class]}") # This is the original image (used by the regularizer) orig_data = Variable(image.clone()) image_in = Variable(image, requires_grad=True) velocity = torch.zeros_like(image_in) for i in range(num_iter): curr_scores = model(image_in)[0] curr_probs = F.softmax(curr_scores, 0) target_prob = curr_probs[target_class] target_score = curr_scores[target_class] # compute the gradient grad_wrt_image = fooling_image_gradient( target_score, orig_data, image_in, target_class, regularization) # update the image with the SGD rule image_in, velocity = normalized_sgd_with_momentum_update( image_in, grad_wrt_image, velocity, momentum, learning_rate) # Detach the image and velocity so that we don't backprop through # multiple iterations of the loop image_in = Variable(image_in.data, requires_grad = True) velocity = Variable(velocity.data, requires_grad = False) # Zero the gradients model.zero_grad() # Take the target probability out of the variable (turn it into float) target_prob = target_prob.data.item() # visualize the current state print(f"({i+1}/{num_iter}), {target_prob * 100} confidence") if target_prob > threshold: break delta = (image_in - orig_data).data return image_in, delta num_images = 2 target_class=113 for cidx, cname in enumerate(classes): dataset = class_datasets[cidx] images_in = [] fooling_images = [] deltas = [] for i, input in enumerate(dataset): image_in = input[0] label = input[1] fooling_image, delta = make_fooling_image( image_in, target_class=target_class, learning_rate=1e-1, regularization=5e-5, num_iter=100, momentum=0.9) delta = 0.5 + (5.0/255.0) * delta images_in.append(image_in) fooling_images.append(fooling_image) deltas.append(delta) if i >= num_images: break print ("\nLeft: original, middle: fooling image, right: difference magnified by 5x (gray is 0).\n" "AlexNet will classify the middle image in each row as %r with high confidence" % ( labels_id2word[target_class])) show_image_rows(list(zip(images_in, fooling_images, deltas)))
Fooling AlexNet into thinking this is a: snail (1/100), 0.5831706803292036 confidence (2/100), 0.6689718458801508 confidence (3/100), 0.8640774525702 confidence (4/100), 1.2419680133461952 confidence (5/100), 1.8803991377353668 confidence (6/100), 2.9153700917959213 confidence (7/100), 4.516325891017914 confidence (8/100), 6.986626237630844 confidence (9/100), 10.528770089149475 confidence (10/100), 15.72534590959549 confidence (11/100), 22.0824733376503 confidence (12/100), 29.726800322532654 confidence (13/100), 37.88847029209137 confidence (14/100), 46.511444449424744 confidence (15/100), 55.062055587768555 confidence (16/100), 63.66371512413025 confidence (17/100), 71.66877388954163 confidence (18/100), 78.63744497299194 confidence (19/100), 84.36155915260315 confidence (20/100), 88.38014006614685 confidence (21/100), 91.15805625915527 confidence Left: original, middle: fooling image, right: difference magnified by 5x (gray is 0). AlexNet will classify the middle image in each row as 'snail' with high confidence
思考:这完全是一个取子之矛攻子之盾的思想。一方面揭示了神经网络仍具有脆弱性,可能受到攻击。另一方面,设想一下如果把这个过程反复迭代进行会发生什么?这将得到一个生成式对抗网络!也许GAN的神奇灵感就从这里迸发的吧。
Todo 6 类别可视化
能让神经网络分类为某个类别的图像都有什么特征? 这样的图像也可以用和上面类似的方式训练出来。 和上面唯一不同的是,这里的损失中\(R(I)=0.5 \lambda\|I\|_2^2\) ,即生成尽可能简单的图样。
def class_visualization_gradient(target_score, image, target_class, reg_lambda): """ Compute the gradient for make_class_visualization (dL / dI). :param target_score: (Variable) holding the current score assigned to the target class :param image: (Variable) shape (1, 3, 224, 224) the current solution :param target_class: (int) ILSVRC class in range [0, 999] :param regularization: (float) weight (lambda) applied to the regularizer. :return grad: (Variable) gradient dL / dI """ grad = torch.zeros_like(image) # placeholder ############################ TODO 6 BEGIN ################################# norm = torch.norm(image) loss = -target_score + 0.5 * reg_lambda * norm * norm loss.backward() grad = image.grad ############################ TODO 6 END ################################# assert tuple(grad.shape) == (1, 3, 224, 224) # expected shape return grad
model = alexnet(pretrained=True) # Set the model to eval mode model.eval() def make_class_visualization(target_class, learning_rate, regularization, num_iter, max_jitter, blur_sigma, momentum): """ Visualize an ILSVRC2012 class by maximizing the probability of that class, starting from random. :param target_class: what ILSVRC2012 class to visualize :param learning_rate: either a constant, or a function that returns the learning rate at each iteration :param regularization: lambda parameter to multiply the regularizer :param num_iter: number of iterations :param max_jitter: amount of jitter to add for regularization :param blur_sigma: blur each iteration by this amount. :param momentum: amount of momentum to use in the SGD update """ image_np = 100 * np.random.randn(1, 3, 224, 224).astype(np.float32) vel_np = np.zeros_like(image_np) print(f"Generating visualization of: {labels_id2word[target_class]}...") for i in range(num_iter): # Random jitter to regularize ox, oy = np.random.randint(-max_jitter, max_jitter+1, 2) image_np = np.roll(np.roll(image_np, ox, -1), oy, -2) vel_np = np.roll(np.roll(vel_np, ox, -1), oy, -2) image = Variable(torch.from_numpy(image_np), requires_grad=True) velocity = Variable(torch.from_numpy(vel_np), requires_grad=False) # Compute the current class score class_scores = model(image)[0] class_probs = F.softmax(class_scores, 0) target_score = class_scores[target_class] target_prob = class_probs[target_class] # Compute the gradient grad = class_visualization_gradient( target_score, image, target_class, regularization) # Normalized SGD+Momentum update image, velocity = normalized_sgd_with_momentum_update( image, grad, velocity, momentum, learning_rate) # Convert back to numpy image_np = image.data.cpu().numpy() vel_np = velocity.data.cpu().numpy() # Undo jitter image_np = np.roll(np.roll(image_np, -ox, -1), -oy, -2) vel_np = np.roll(np.roll(vel_np, -ox, -1), -oy, -2) # blur the image every iteration for c in range(3): image_np[0, c, ...] = gaussian_filter(image_np[0, c, ...], sigma=blur_sigma) # Convert to float target_prob = target_prob.data.item() target_score = target_score.data.item() # Visualize our current result print(f"({i+1}/{num_iter}), {target_prob * 100} % confidence, score: {target_score}") return torch.from_numpy(image_np)[0]
Generating visualization of: flamingo... (1/250), 0.0 % confidence, score: 81.66814422607422 (2/250), 0.0 % confidence, score: 79.51008605957031 (3/250), 0.0 % confidence, score: 96.74752044677734 (4/250), 0.0 % confidence, score: 89.8150405883789 (5/250), 0.0 % confidence, score: 139.24099731445312 (6/250), 0.0 % confidence, score: 116.5067138671875 (7/250), 0.0 % confidence, score: 129.0389404296875 (8/250), 0.0 % confidence, score: 159.90687561035156 (9/250), 0.0 % confidence, score: 113.67705535888672 (10/250), 0.0 % confidence, score: 117.25206756591797 (11/250), 0.0 % confidence, score: 151.54722595214844 (12/250), 0.0 % confidence, score: 180.6258087158203 (13/250), 0.0 % confidence, score: 206.93601989746094 (14/250), 0.0 % confidence, score: 187.57032775878906 (15/250), 0.0 % confidence, score: 204.2655029296875 (16/250), 0.0 % confidence, score: 187.85523986816406 (17/250), 0.0 % confidence, score: 275.1002197265625 (18/250), 0.0 % confidence, score: 231.91543579101562 (19/250), 6.850279492456855e-36 % confidence, score: 270.7643127441406 (20/250), 2.5737667279289767e-34 % confidence, score: 297.01470947265625 ... (230/250), 100.0 % confidence, score: 19858.681640625 (231/250), 100.0 % confidence, score: 21682.11328125 (232/250), 100.0 % confidence, score: 20583.8203125 (233/250), 100.0 % confidence, score: 19506.390625 (234/250), 100.0 % confidence, score: 20750.79296875 (235/250), 100.0 % confidence, score: 21494.609375 (236/250), 100.0 % confidence, score: 18931.51171875 (237/250), 100.0 % confidence, score: 20664.38671875 (238/250), 100.0 % confidence, score: 19632.5078125 (239/250), 100.0 % confidence, score: 20610.9140625 (240/250), 100.0 % confidence, score: 19816.77734375 (241/250), 100.0 % confidence, score: 19917.841796875 (242/250), 100.0 % confidence, score: 21733.951171875 (243/250), 100.0 % confidence, score: 21844.37890625 (244/250), 100.0 % confidence, score: 20790.75 (245/250), 100.0 % confidence, score: 21141.03125 (246/250), 100.0 % confidence, score: 21582.42578125 (247/250), 100.0 % confidence, score: 21072.703125 (248/250), 100.0 % confidence, score: 21452.84375 (249/250), 100.0 % confidence, score: 22731.75390625 (250/250), 100.0 % confidence, score: 21030.724609375