I'm very new to ML image manipulation/creation, so if I confuse you all with my own lack of knowledge on the subject, I apologize in advance.
I'm attempting to increase the resolution of images produced by DALL-E, inspired by this article:
https://towardsdatascience.com/big-art-using-machine-learning-to-create-high-res-fine-art-7dd695f99788
However, when I attempt to feed images from DALL-E into the "Generate 1K Image" section of the original author's code/colab, my original image becomes very washed out; likely because in the source, some sort of tensor from a different model is fed in, while in my own version, I'm converting an image to a (poorly made?) tensor in the same section and then feeding that in.
Here's what I have:
##title Generate 1K Image
from google.colab import files
from io import BytesIO
from PIL import Image
from matplotlib import pyplot as plt
import numpy as np
from torchvision import transforms as T
import IPython
import os.path
import cv2
uploaded = files.upload()
texture_amount = 0.05 ##param {type:"slider", min:0, max:0.15, step:0.001}
texture_size = 3 ##param {type:"slider", min:1, max:9, step:2}
enhance_details = True ##param {type:"boolean"}
img = Image.open(BytesIO(uploaded['knight.png']))
plt.imshow(img)
plt.show()
transform = transforms.Compose([
transforms.ToTensor()
])
tensorImage = transform(img)
selected_img = tensorImage.cuda()
selected_img = selected_img.type(torch.cuda.FloatTensor)
selected_img = selected_img.add(1).div(2)[None, :]
with torch.no_grad():
torch.cuda.empty_cache()
resized = bsrgan_model(selected_img)
torch.cuda.empty_cache()
noise = torch.normal(0, texture_amount,
size=[resized.shape[0], 1, resized.shape[2], resized.shape[3]]).to(device)
noise = noise.repeat(1, 3, 1, 1)
noise_blurred = T.GaussianBlur(kernel_size=texture_size, sigma=1)(noise)
noise_blurred = noise*0.25 + noise_blurred*0.75
resized = (resized+noise_blurred)
final_image = resized.to(device)
if enhance_details:
with torch.no_grad():
torch.cuda.empty_cache()
z, *_ = vqgan_model.encode(final_image * 2 - 1)
final_image = vqgan_model.decode(z)[0].add(1).div(2).clamp(min=0, max=1)
torch.cuda.empty_cache()
final_image = final_image.clamp(min=0, max=1)
else:
final_image = final_image[0].clamp(min=0, max=1)
img = T.ToPILImage()(final_image)
img.save("output_1k.png")
IPython.display.Image("output_1k.png")
Original Image
Resulting Image
Any ideas as to how I can fix this issue is greatly, greatly appreciated!
Solution
You are calculating average with 1 for every pixel values in this line.
selected_img = selected_img.add(1).div(2)[None, :]
You should change it to this line
selected_img = selected_img[None, :]
Explanation
(For a pixel if R G B = 1 1 1, the color of the pixel is white, and if R G B = 0 0 0 the color of the pixel is black.) For example if for a pixel these values are R G B = 0 .5 1, in the line below, you're changing it to R G B = .5 .75 1(getting average with 1). You can check it with this code.
import torch
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt
from torchvision import transforms
fig, axs = plt.subplots(nrows=1, ncols=3, constrained_layout=True)
img = Image.open('knight.png')
axs[0].imshow(img)
axs[0].set_title('Original Image')
transform = transforms.Compose([
transforms.ToTensor()
])
tensorImage = transform(img)
selected_img = tensorImage
selected_img = selected_img.type(torch.FloatTensor)
selected_img_0 = selected_img.add(1).div(2)[None, :]
axs[1].imshow(np.squeeze(selected_img_0).permute(1, 2, 0))
axs[1].set_title('Averaged Tensor Image')
selected_img_1 = selected_img[None, :]
axs[2].imshow(np.squeeze(selected_img_1).permute(1, 2, 0))
axs[2].set_title('Tensor Image')
plt.show()
Related
I have problem with segmenting (or clustering?) pictures using their histograms - i mean i dont know totally how can i do it. I have, lets say, 200 images and i have to group it (like people to people, buildings to buildings etc.)
If you want to make exacly same task as me there is a source of images http://wang.ist.psu.edu/docs/related/
I know how to get histograms etc, my code is below (with detailed description).
import numpy as np
import matplotlib.pyplot as plt
import cv2
# 1. Loading images from folder
path = 'image\\350.jpg'
img = plt.imread(path)
imglist=[]
for i in range(0,20):
x = np.random.randint(0,1000)
path = "image\\"+str(x)+".jpg"
# print(path)
img = plt.imread(path)
imglist.append(img)
# just testing if everything is fine
plt.figure(figsize = (20,10))
for i in range(0,20):
plt.subplot(4,5,i+1)
plt.imshow(imglist[i])
plt.xticks([])
plt.yticks([])
# 2. QUANTIZATION (FOR ONE IMAGE)
# QUANTIZATION TO 2 COLORS FOR EACH OF R, G, B --> 8 COLORS
imgq2 = np.floor(img/128)*128+64
imgq2 =imgq2.astype(int)
# QUANTIZATION TO 4 COLORS
imgq4 = np.floor(img/64)*64 + 32
imgq4 = imgq4.astype(int)
# QUANTIZATION TO 8 COLORS
imgq8 = np.floor(img/32)*32 + 16
imgq8 = imgq8.astype(int)
# TESTING
plt.figure(figsize = (20,10))
plt.subplot(1,3,1)
plt.imshow(img)
plt.title('Oryginal image')
plt.subplot(1,3,2)
plt.imshow(imgq2)
plt.title('2 values of RGB')
plt.subplot(1,3,3)
plt.imshow(imgq4)
plt.title('4 values of RGB')
# checking if this is true
imgq4.shape
imgq8.shape
print(np.unique(imgq4[:,:,:]))
print(np.unique(imgq8[:,:,:]))
imgq4 = np.floor(img/128)
imgq4 = imgq4.astype(int)
# plt.imshow(imgq4)
# plt.show()
# 3. MAKING 3-Dim (RGB) HISTOGRAM
hist = {(i,j,k): 0 for i in range(0,4) for j in range(0,4) for k in range(0,4)}
print(hist)
img = plt.imread('image\\0.jpg')
img2 = img.copy()
img2 = np.floor(img2/64).astype(int)
img2.shape
img2 = img2.reshape( img2.shape[0]*img2.shape[1] ,3)
for i in range(0,img2.shape[0]):
hist[ img2[i,0],img2[i,1],img2[i,2] ] = hist[img2[i,0],img2[i,1],img2[i,2]] + 1
print(len(hist.keys()), len(hist.values()))
# histogram values
print(hist.values())
print(img2.shape[0] == np.sum(list(hist.values())))`
I have also this file, which is making clustering on random points
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits
no_of_points = 1000
X = np.random.rand(no_of_points,2)
plt.scatter(X[:,0],X[:,1],s = 50, cmap = 'rainbow')
kmeans = KMeans(n_clusters = 5)
kmeans.fit(X)
plt.scatter(X[:,0], X[:,1], s = 50, cmap = 'rainbow', c = kmeans.labels_)
plt.scatter(kmeans.cluster_centers_[:,0],kmeans.cluster_centers_[:,1],s = 100, c = 'black', alpha = 0.5)
plt.show()
To sum up, i have 2 questions.
Is this possible to make or transform somehow histograms (or vectors) to something, which i could cluster like in my second file?
How to automatize process of making image histogram? (its made just for one image)
Thanks for help!
I get an error when I'm trying to find the 2 circle inner one for pupil and outer one for iris but unable to do so. Firstly I reshape the image then then finding bandwidth to know kernel value then I do segmentation in using mean shift algo after then i marked cluster region in red colour:
import tkinter as tk
from tkinter import filedialog
from PIL import ImageTk,Image
import numpy as np
import scipy.ndimage as snd
from sklearn.cluster import MeanShift, estimate_bandwidth
from sklearn.datasets.samples_generator import make_blobs
from itertools import cycle
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import cv2
pylab.rcParams['figure.figsize'] = 16, 12
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename(initialdir="F:\mean shift\images",title="Open File",filetypes= (("all files","*.*"),("jpg files","*.jpg")))
image = Image.open(file_path)
image = np.array(image)
original_shape = image.shape
# Flatten image.
X = np.reshape(image, [-1, 3])
plt.imshow(image)
bandwidth = estimate_bandwidth(X, quantile=0.1, n_samples=100)
print(bandwidth)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
print(labels.shape)
cluster_centers = ms.cluster_centers_
print(cluster_centers.shape)
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
print("number of estimated clusters : %d" % n_clusters_)
segmented_image = np.reshape(labels, original_shape[:2]) # Just take size, ignore RGB channels.
plt.figure(2)
plt.imshow(segmented_image)
plt.axis('off')
masked_image = np.copy(image)
# convert to the shape of a vector of pixel values
masked_image = masked_image.reshape((-1, 3))
# color (i.e cluster) to disable
cluster = 2
masked_image[labels == cluster] = [255, 0, 0]
# convert back to original shape
masked_image = masked_image.reshape(image.shape)
# show the image
plt.imshow(masked_image)
nemo = cv2.cvtColor(masked_image, cv2.COLOR_BGR2RGB)
cv2.imwrite("mean_shift.bmp",nemo)
plt.show()
I'm trying to add a random noise from uniform distribution between min pixel
value and 0.1 times the maximum pixel value to each pixel for each channel of original image.
Here's my code so far:
[in]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Read image with cv2
image = cv2.imread('example_image.jpg' , 1)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Display image
imshow(image_rgb)
# R,G,B channel separation
R, G, B = cv2.split(image_rgb)
# Creating Noise
noise_R = np.random.uniform(R.min(),R.max()*0.1, R.size)
noise_R.shape = (256,256)
noise_G = np.random.uniform(B.min(),B.max()*0.1, G.size)
noise_G.shape = (256,256)
noise_B = np.random.uniform(G.min(), G.max()*0.1, B.size)
noise_B.shape = (256,256)
# Adding noise to each channel separately
R = R + noise_R
G = G + noise_G
B = B + noise_B
rgb_noise = R + G + B
noisy_image = image + rgb_noise
[out]:
ValueError: operands could not be broadcast together with shapes (256,256,3) (256,256)
I'm getting an ValueError that the array shapes for rgb_noise and image are not equal. I've tried changing the shape of rgb_noise to that of image's but the I get a size error. How to fix it ? Is there any better method ?
Your solution is a bit verbose, and could be made more compact.
However, the reason why you do not get white-ish noise is that you compute your red channel differently from the other two.
Changing this:
noise_R = np.random.uniform(R_min,R_max*0.3, image_G.size)
to this:
noise_R = np.random.uniform(R_min,R_max*0.1, image_R.size)
You can be simplistic and add the noise by only the numpy array.
import numpy
import matplotlib.pyplot as plt
import cv2
Look, plotting the image will only work good with jupyter notebooks.
Do cv2.imshow() for other IDEs.
1) Have your Image
img = cv2.imread('path').astype(np.uint0)
2) Make a random noise
r, g, b = img.shape
noise = np.random.randint(0,255,r*g*b).reshape(r,g,b)
3) Blend them
image_with_noise = cv2.addWeighted(img,0.5,noise,0.5,0)
You can adjust the value of alpha and beta values.
There you have a noisy image!
This is basically the same question that was posted here: How to merge a transparent png image with another image using PIL but using with scikit-image instead of PIL. I mean to paste the png keeping its transparency on top of a background image. Also, if there is actually a way of doing it, I would like to know which one is faster (PIL or scikit-image). Thanks.
Read the two images and add using the formula img1*alpha + img2*(1-alpha)
import numpy as np
from matplotlib import pyplot as plt
import skimage.io
img1 = skimage.io.imread('Desert.jpg')
img2 = skimage.io.imread('Penguins.jpg')
img3 = np.ubyte(0.7*img1 + 0.3*img2)
plt.imshow(img3)
Another option could be to use the alpha channel of two images as masks as below
import numpy as np
from matplotlib import pyplot as plt
import skimage.io
img1 = skimage.io.imread('img1.png')
img2 = skimage.io.imread('img2.png')
mask1 = img1.copy()
mask2 = img2.copy()
mask1[:,:,0] = mask1[:,:,3]
mask1[:,:,1] = mask1[:,:,3]
mask1[:,:,2] = mask1[:,:,3]
mask2[:,:,0] = mask2[:,:,3]
mask2[:,:,1] = mask2[:,:,3]
mask2[:,:,2] = mask2[:,:,3]
img3 = np.bitwise_or(np.bitwise_and(img1, mask1),np.bitwise_and(img2, mask2)) ;
plt.subplot(2,2,1)
plt.imshow(img1)
plt.subplot(2,2,2)
plt.imshow(img2)
plt.subplot(2,2,3)
plt.imshow(img3)
Inspired by user8190410's answer, I built my own function to do it:
from skimage import data
import numpy as np
x, y = 100, 100
background = data.imread('background.jpg') / 255.
image = data.imread('image.png') / 255.
background_height, background_width, background_depth = background.shape
image_height, image_width, image_depth = image.shape
template = np.zeros((background_height, background_width, image_depth))
template[y : y + image_height, x : x + image_width, :] = image
mask = np.stack([template[:,:,3] for _ in range(3)], axis = 2)
inv_mask = 1. - mask
result = background[:,:,:3] * inv_mask + template[:,:,:3] * mask
plt.figure(figsize = (15, 15))
plt.subplot(1, 3, 2)
plt.imshow(image)
plt.subplot(1, 3, 1)
plt.imshow(background)
plt.subplot(1, 3, 3)
plt.imshow(result)
plt.tight_layout()
plt.show()
Please let me know if I can do something to improve computation speed
I am trying to implement adaptive histogram equalization in python. I take an image and split it into smaller regions and then apply the traditional histogram equalization to it. I then combine the smaller images into one and obtain a final resultant image. The final image appears to be very blocky in nature and has different contrast levels for each individual region. Is there a way I could maintain a uniform contrast for each individual image so that it looks like a single image instead of smaller images stitched together.
import cv2
import numpy as np
from matplotlib import pyplot as plt
from scipy.misc import imsave
from scipy import ndimage
from scipy import misc
import scipy.misc
import scipy
import image_slicer
from image_slicer import join
from PIL import Image
img = 'watch.png'
num_tiles = 25
tiles = image_slicer.slice(img, num_tiles)
for tile in tiles:
img = scipy.misc.imread(tile.filename)
hist,bins = np.histogram(img.flatten(),256,[0,256])
cdf = hist.cumsum()
cdf_normalized = cdf *hist.max()/ cdf.max()
plt.plot(cdf_normalized, color = 'g')
plt.hist(img.flatten(),256,[0,256], color = 'g')
plt.xlim([0,256])
plt.legend(('cdf','histogram'), loc = 'upper left')
cdf_m = np.ma.masked_equal(cdf,0)
cdf_o = (cdf_m - cdf_m.min())*255/(cdf_m.max()-cdf_m.min())
cdf = np.ma.filled(cdf_o,0).astype('uint8')
img3 = cdf[img]
cv2.imwrite(tile.filename,img3)
tile.image = Image.open(tile.filename
image = join(tiles)
image.save('watch-join.png')
I reviewed the actual algorithm and came up with the following implementation. I am sure there is a better way to do this. Any suggestions are appreciated.
import numpy as np
import cv2
img = cv2.imread('watch.png',0)
print img
img_size=img.shape
print img_size
img_mod = np.zeros((600, 800))
for i in range(0,img_size[0]-30):
for j in range(0,img_size[1]-30):
kernel = img[i:i+30,j:j+30]
for k in range(0,30):
for l in range(0,30):
element = kernel[k,l]
rank = 0
for m in range(0,30):
for n in range(0,30):
if(kernel[k,l]>kernel[m,n]):
rank = rank + 1
img_mod[i,j] = ((rank * 255 )/900)
im = np.array(img_mod, dtype = np.uint8)
cv2.imwrite('target.png',im)