단일 이미지를 VAE를 통해 당주 뷰 이미지로 전환한 후, NeRF모델을 구현하여 3D 클라우드 포인트 형태를 출력한다.
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
# Load the image
image_path = r"C:\Users\연준모\2. KPASS\ver2\images\1234.jpg"
input_image = Image.open(image_path).convert('RGB')
# Define the transformation
transform = transforms.Compose([
transforms.Resize((64, 64)),
transforms.ToTensor()
])
# Apply the transformation
input_tensor = transform(input_image) # Shape should be [3, 64, 64]
# Data augmentation
aug_transform = transforms.Compose([
transforms.RandomRotation(30),
transforms.RandomHorizontalFlip(),
transforms.ToTensor()
])
# Create a dataset with repeated and augmented images
class SingleImageDataset(Dataset):
def __init__(self, image_tensor, num_samples, transform):
self.image_tensor = image_tensor
self.num_samples = num_samples
self.transform = transform
def __len__(self):
return self.num_samples
def __getitem__(self, idx):
image = self.image_tensor.permute(1, 2, 0).numpy()
image = Image.fromarray((image * 255).astype(np.uint8))
image = self.transform(image)
return image
# Create DataLoader
dataset = SingleImageDataset(input_tensor, num_samples=1000, transform=aug_transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# Define VAE
class VAE(nn.Module):
def __init__(self, img_channels=3, z_dim=10):
super(VAE, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(img_channels, 16, 3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 32, 3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(32, 64, 3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(64, 128, 3, stride=2, padding=1),
nn.ReLU(),
nn.Flatten(),
nn.Linear(128 * 4 * 4, 128),
nn.ReLU(),
nn.Linear(128, 2 * z_dim) # for mean and logvar
)
self.decoder = nn.Sequential(
nn.Linear(z_dim, 128),
nn.ReLU(),
nn.Linear(128, 128 * 4 * 4),
nn.ReLU(),
nn.Unflatten(1, (128, 4, 4)),
nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1),
nn.ReLU(),
nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
nn.ReLU(),
nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1),
nn.ReLU(),
nn.ConvTranspose2d(16, img_channels, 3, stride=2, padding=1, output_padding=1),
nn.Sigmoid()
)
def reparameterize(self, mu, logvar):
std = torch.exp(0.5 * logvar)
eps = torch.randn_like(std)
return mu + eps * std
def forward(self, x):
x = self.encoder(x)
mu, logvar = x.chunk(2, dim=-1)
z = self.reparameterize(mu, logvar)
return self.decoder(z), mu, logvar
# Initialize the model
vae = VAE(img_channels=3, z_dim=10)
# Define loss function and optimizer
def loss_function(recon_x, x, mu, logvar):
BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
return BCE + KLD
optimizer = optim.Adam(vae.parameters(), lr=1e-3)
# Training loop
epochs = 10
vae.train()
for epoch in range(epochs):
for imgs in dataloader:
imgs = imgs.to(torch.float32) # Ensure correct dtype
recon_imgs, mu, logvar = vae(imgs)
loss = loss_function(recon_imgs, imgs, mu, logvar)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Epoch {epoch}, Loss: {loss.item()}')
# 잠재 공간 조작을 통해 다양한 시점의 이미지 생성
vae.eval()
with torch.no_grad():
# 잠재 공간에서 임의의 벡터 생성
z = torch.randn(10, 10) # 10개의 시점을 생성
# 잠재 공간 벡터를 조작하여 다양한 시점 생성
for i in range(10):
z_i = z[i].unsqueeze(0) # 단일 벡터
generated_img = vae.decoder(z_i)
# 이미지를 [0, 1] 범위에서 [0, 255] 범위로 변환
generated_img = generated_img * 255
generated_img = generated_img.permute(0, 2, 3, 1) # for visualization
generated_img = generated_img.squeeze().type(torch.uint8)
generated_img = Image.fromarray(generated_img.cpu().numpy())
# 이미지를 저장하거나 출력
generated_img.save(f'generated_view_{i}.png')
print("코드 실행은 일단 끝남")
위의 코드는 다음을 사용한다.
코드를 실행해 보면, 다중 뷰 이미지가 잘 출력됨을 확인할 수 있다.
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import imageio.v2 as imageio
import os
import matplotlib.pyplot as plt
from torchvision import transforms
import trimesh
device = torch.device("cpu") # GPU 사용을 제거하고 CPU만 사용하도록 설정
class NeRF(nn.Module):
def __init__(self):
super(NeRF, self).__init__()
self.layers = nn.ModuleList([nn.Linear(6, 256)]) # 입력 크기를 6으로 조정
self.layers.extend([nn.Linear(256, 256) for _ in range(8)])
self.rgb_layer = nn.Linear(256, 3)
self.density_layer = nn.Linear(256, 1)
def forward(self, x):
for layer in self.layers:
x = torch.relu(layer(x))
rgb = torch.sigmoid(self.rgb_layer(x))
density = torch.relu(self.density_layer(x))
return torch.cat([rgb, density], dim=-1)
def load_images(image_dir):
images = []
for i in range(10):
image_path = os.path.join(image_dir, f'generated_view_{i}.png')
img = imageio.imread(image_path)
images.append(img)
return images
def preprocess_images(images):
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Resize((256, 256)),
])
return [transform(img).unsqueeze(0) for img in images]
def generate_rays(h, w, K, c2w):
i, j = torch.meshgrid(torch.arange(h), torch.arange(w), indexing='ij')
dirs = torch.stack([(j - K[0, 2]) / K[0, 0], -(i - K[1, 2]) / K[1, 1], -torch.ones_like(i)], dim=-1)
dirs = dirs.view(-1, 3)
c2w = torch.tensor(c2w).float() # numpy 배열을 PyTorch 텐서로 변환
rays_d = torch.sum(dirs[..., np.newaxis, :] * c2w[:3, :3], -1) # Rotate ray directions into world space
rays_o = c2w[:3, -1].expand(rays_d.shape)
return torch.cat([rays_o, rays_d], dim=-1)
def train_nerf(images, K, c2w_list):
model = NeRF().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()
num_epochs = 20 # 변경된 에포크 수
h, w = images[0].shape[2], images[0].shape[3]
for epoch in range(num_epochs):
total_loss = 0
for img, c2w in zip(images, c2w_list):
img = img.to(device)
rays = generate_rays(h, w, K, c2w).to(device)
target = img.view(-1, 3).to(device)
output = model(rays)
output_rgb = output[:, :3]
loss = criterion(output_rgb, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
if (epoch+1) % 5 == 0: # 5 에포크마다 출력
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}')
visualize_images(images)
def visualize_images(images):
plt.figure(figsize=(10, 10))
for i, img in enumerate(images):
plt.subplot(1, len(images), i + 1)
plt.imshow(img.squeeze().permute(1, 2, 0).numpy())
plt.axis('off')
plt.show()
def generate_point_cloud(model, num_points=10000):
rays = torch.rand((num_points, 6)).to(device)
with torch.no_grad():
output = model(rays)
rgb = output[:, :3].cpu().numpy()
points = rays[:, :3].cpu().numpy()
return points, rgb
def save_point_cloud(points, colors, filename='point_cloud.ply'):
point_cloud = trimesh.PointCloud(vertices=points, colors=(colors * 255).astype(np.uint8))
point_cloud.export(filename)
print(f'Point cloud saved to {filename}')
def visualize_point_cloud(file_path):
point_cloud = trimesh.load(file_path)
points = point_cloud.vertices
colors = point_cloud.colors / 255.0
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(points[:, 0], points[:, 1], points[:, 2], c=colors, s=1)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.title('Point Cloud Visualization')
plt.show()
def main():
image_dir = r'C:\Users\연준모\2. KPASS\ver2'
print("코드 실행 시작")
images = load_images(image_dir)
images = preprocess_images(images)
# 예시 카메라 매트릭스와 포즈 설정 (사용자 설정 필요)
K = np.array([[256, 0, 128], [0, 256, 128], [0, 0, 1]], dtype=np.float32)
c2w_list = [np.eye(4) for _ in range(10)] # 각 이미지에 대한 카메라 포즈
train_nerf(images, K, c2w_list)
model = NeRF().to(device)
points, colors = generate_point_cloud(model)
point_cloud_path = os.path.join(image_dir, 'point_cloud.ply')
save_point_cloud(points, colors, point_cloud_path)
visualize_point_cloud(point_cloud_path)
if __name__ == "__main__":
main()
이어서, NeRF 코드 이다.
이 NeRF 코들르 통해 앞서 VAE로 만든 다중뷰 이미지를 3D 모델로 만드는 멀티 모델 구조를 만들 수 있다.