CUDA-QとPyTorchを組み合わせたハイブリッド量子ニューラルネットワークです。
両方のレイヤーがGPUで高速化されることで、最大限のパフォーマンスが得られます。
この例では、MNISTデータセットを使った2値分類を行います。
データはニューラルネットワークを通って量子回路へと流れ込み、その出力をもとに手書き数字を分類します。
一部元の原文が動かなかったので直したところがあります。また、原文ではCPUで実行していましたが、今回はGPUで実行をしました。
GPU: NVIDIA H100
!pip install matplotlib==3.8.4 torch==2.0.1+cu118 torchvision==0.15.2+cu118 scikit-learn==1.4.2 -q --extra-index-url https://download.pytorch.org/whl/cu118
# Import the relevant libraries
import cudaq
from cudaq import spin
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch.autograd import Function
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn
import torchvision
from sklearn.model_selection import train_test_split
torch.manual_seed(22)
cudaq.set_random_seed(44)
# Set CUDAQ and PyTorch to run on either CPU or GPU.
#device = torch.device('cpu')
#cudaq.set_target("qpp-cpu")
cudaq.set_target("nvidia")
device = torch.device("cuda:0")
def prepare_data(target_digits, sample_count, test_size):
"""Load and prepare the MNIST dataset to be used
Args:
target_digits (list): digits to perform classification of
sample_count (int): total number of images to be used
test_size (float): percentage of sample_count to be used as test set, the remainder is the training set
Returns:
dataset in train, test format with targets
"""
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.1307), (0.3081))])
dataset = datasets.MNIST("./data",
train=True,
download=True,
transform=transform)
# Filter out the required labels.
idx = (dataset.targets == target_digits[0]) | (dataset.targets
== target_digits[1])
dataset.data = dataset.data[idx]
dataset.targets = dataset.targets[idx]
# Select a subset based on number of datapoints specified by sample_count.
subset_indices = torch.randperm(dataset.data.size(0))[:sample_count]
x = dataset.data[subset_indices].float().unsqueeze(1).to(device)
y = dataset.targets[subset_indices].to(device).float().to(device)
# Relabel the targets as a 0 or a 1.
y = torch.where(y == min(target_digits), 0.0, 1.0)
# NumPyに変換してからtrain_test_split
x_np = x.cpu().numpy()
y_np = y.cpu().numpy()
x_train, x_test, y_train, y_test = train_test_split(x_np,
y_np,
test_size=test_size / 100,
shuffle=True,
random_state=42)
# 再びテンソルに変換(必要に応じてdeviceへ)
x_train = torch.tensor(x_train).to(device)
x_test = torch.tensor(x_test).to(device)
y_train = torch.tensor(y_train).to(device)
y_test = torch.tensor(y_test).to(device)
return x_train, x_test, y_train, y_test
# Classical parameters.
sample_count = 1000 # Total number of images to use.
target_digits = [5, 6] # Hand written digits to classify.
test_size = 30 # Percentage of dataset to be used for testing.
classification_threshold = 0.5 # Classification boundary used to measure accuracy.
epochs = 1000 # Number of epochs to train for.
# Quantum parmeters.
qubit_count = 1
hamiltonian = spin.z(0) # Measurement operator.
shift = torch.tensor(torch.pi / 2) # Magnitude of parameter shift.
x_train, x_test, y_train, y_test = prepare_data(target_digits, sample_count,
test_size)
# Plot some images from the training set to visualise.
if device != 'cpu':
sample_to_plot = x_train[:10].to(torch.device('cpu'))
else:
sample_to_plot = x_train[:10]
grid_img = torchvision.utils.make_grid(sample_to_plot,
nrow=5,
padding=3,
normalize=True)
plt.imshow(grid_img.permute(1, 2, 0))
plt.axis('off')
plt.show()
class QuantumFunction(Function):
"""Allows the quantum circuit to input data, output expectation values
and calculate gradients of variational parameters via finite difference"""
def __init__(self, qubit_count: int, hamiltonian: cudaq.SpinOperator):
"""Define the quantum circuit in CUDA Quantum"""
@cudaq.kernel
def kernel(qubit_count: int, thetas: np.ndarray):
qubits = cudaq.qvector(qubit_count)
ry(thetas[0], qubits[0])
rx(thetas[1], qubits[0])
self.kernel = kernel
self.qubit_count = qubit_count
self.hamiltonian = hamiltonian
def run(self, theta_vals: torch.tensor) -> torch.tensor:
"""Excetute the quantum circuit to output an expectation value"""
#If running on GPU, thetas is a torch.tensor that will live on GPU memory. The observe function calls a .tolist() method on inputs which moves thetas from GPU to CPU.
qubit_count = [self.qubit_count for _ in range(theta_vals.shape[0])]
results = cudaq.observe(self.kernel, self.hamiltonian, qubit_count,
theta_vals)
exp_vals = [results[i].expectation() for i in range(len(results))]
exp_vals = torch.Tensor(exp_vals).to(device)
return exp_vals
@staticmethod
def forward(ctx, thetas: torch.tensor, quantum_circuit,
shift) -> torch.tensor:
# Save shift and quantum_circuit in context to use in backward.
ctx.shift = shift
ctx.quantum_circuit = quantum_circuit
# Calculate expectation value.
exp_vals = ctx.quantum_circuit.run(thetas).reshape(-1, 1)
ctx.save_for_backward(thetas, exp_vals)
return exp_vals
@staticmethod
def backward(ctx, grad_output):
"""Backward pass computation via finite difference"""
thetas, _ = ctx.saved_tensors
gradients = torch.zeros(thetas.shape, device=device)
for i in range(thetas.shape[1]):
thetas_plus = thetas.clone()
thetas_plus[:, i] += ctx.shift
exp_vals_plus = ctx.quantum_circuit.run(thetas_plus)
thetas_minus = thetas.clone()
thetas_minus[:, i] -= ctx.shift
exp_vals_minus = ctx.quantum_circuit.run(thetas_minus)
gradients[:, i] = (exp_vals_plus - exp_vals_minus) / (2 * ctx.shift)
gradients = torch.mul(grad_output, gradients)
return gradients, None, None
class QuantumLayer(nn.Module):
"""Encapsulates a quantum circuit into a quantum layer adhering PyTorch convention"""
def __init__(self, qubit_count: int, hamiltonian, shift: torch.tensor):
super(QuantumLayer, self).__init__()
self.quantum_circuit = QuantumFunction(qubit_count, hamiltonian)
self.shift = shift
def forward(self, input):
result = QuantumFunction.apply(input, self.quantum_circuit, self.shift)
return result
class Hybrid_QNN(nn.Module):
"""Structure of the hybrid neural network with classical fully connected layers and quantum layers"""
def __init__(self):
super(Hybrid_QNN, self).__init__()
self.fc1 = nn.Linear(28 * 28, 256)
self.fc2 = nn.Linear(256, 128)
self.dropout = nn.Dropout(0.25)
self.fc3 = nn.Linear(128, 64)
self.fc4 = nn.Linear(64, 32)
self.fc5 = nn.Linear(32, 2)
self.dropout = nn.Dropout(0.25)
# The 2 outputs from PyTorch fc5 layer feed into the 2 variational gates in the quantum circuit.
self.quantum = QuantumLayer(qubit_count, hamiltonian, shift)
def forward(self, x):
x = x.view(-1, 28 * 28) # Turns images into vectors.
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.dropout(x)
x = torch.relu(self.fc3(x))
x = torch.relu(self.fc4(x))
x = torch.relu(self.fc5(x))
x = self.dropout(x)
# Quantum circuit outputs an expectation value which is fed into the sigmoid activation function to perform classification.
x = torch.sigmoid(self.quantum(x))
return x.view(-1)
def accuracy_score(y, y_hat):
return sum((y == (y_hat >= classification_threshold))) / len(y)
hybrid_model = Hybrid_QNN().to(device)
optimizer = optim.Adadelta(hybrid_model.parameters(),
lr=0.001,
weight_decay=0.8)
loss_function = nn.BCELoss().to(device)
training_cost = []
testing_cost = []
training_accuracy = []
testing_accuracy = []
hybrid_model.train()
for epoch in range(epochs):
optimizer.zero_grad()
y_hat_train = hybrid_model(x_train).to(device)
train_cost = loss_function(y_hat_train, y_train).to(device)
train_cost.backward()
optimizer.step()
training_accuracy.append(accuracy_score(y_train, y_hat_train))
training_cost.append(train_cost.item())
hybrid_model.eval()
with torch.no_grad():
y_hat_test = hybrid_model(x_test).to(device)
test_cost = loss_function(y_hat_test, y_test).to(device)
testing_accuracy.append(accuracy_score(y_test, y_hat_test))
testing_cost.append(test_cost.item())
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(training_cost, label='Train')
plt.plot(testing_cost, label='Test')
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(training_accuracy, label='Train')
plt.plot(testing_accuracy, label='Test')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.tight_layout()
plt.show()
✅ 学習時間: 1272.10 秒 (1エポック平均: 1.2721 秒)
🧠 推論時間: 0.0858 秒 (1サンプルあたり: 0.2860 ms)