Binary classification #292

goussous0 · 2024-07-01T11:12:57Z

I get an index error while trying to train a kan model on binary classification problem with labeled data

import torch
from dataset import load_custom_dataset
from kan import *

features = [
    "f1",
    "f2",
    "f3",
    "f4",
    "f5",
    "f6",
]

dataset = load_custom_dataset("6-5-2024.csv", features=features, target_column="category")


print("Train data shape: {}".format(dataset['train_input'].shape))
print("Train target shape: {}".format(dataset['train_label'].shape))
print("Test data shape: {}".format(dataset['test_input'].shape))
print("Test target shape: {}".format(dataset['test_label'].shape))
print("====================================")

if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")

print(device)
image_folder = 'video_img'


model = KAN(width=[len(features), 5, 1], grid=5, k=3, seed=0, device=device)
model(dataset['train_input'])
model.plot(beta=100, scale=1, in_vars=features, out_vars=['signature'])


def train_acc():
    return torch.mean((torch.argmax(model(dataset['train_input']), dim=1) == dataset['train_label']).float())

def test_acc():
    return torch.mean((torch.argmax(model(dataset['test_input']), dim=1) == dataset['test_label']).float())


results = model.train(dataset, opt="Adam", device=device, metrics=(train_acc, test_acc),
                      loss_fn=torch.nn.CrossEntropyLoss(), steps=100, lamb=0.01, lamb_entropy=10., save_fig=True, img_folder=image_folder)


print (results['train_acc'][-1], results['test_acc'][-1])

and this is how i am loading the dataset

def load_custom_dataset(csv_file, features, target_column):
    # Load CSV file into a pandas DataFrame
    df = pd.read_csv(csv_file)

    # Extract features and target
    # data = df.drop(columns=[target_column]).values
    data = df[features].values
    target = df[target_column].values

    label_encoder = LabelEncoder()
    target = label_encoder.fit_transform(target)    

    # Convert to PyTorch tensors
    data_tensor = torch.tensor(data, dtype=torch.float32)
    target_tensor = torch.tensor(target, dtype=torch.uint8)

    # Split dataset into train and test sets
    train_data, test_data, train_target, test_target = train_test_split(data_tensor, target_tensor, test_size=0.2, random_state=42)

    # Create data loaders (optional, if you want to batch and shuffle the data)
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(train_data, train_target), batch_size=1, shuffle=True)
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(test_data, test_target), batch_size=1, shuffle=False)

    # Prepare tensors for device (assuming 'device' is defined somewhere)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_inputs = torch.empty(0, train_data.shape[1], device=device)
    train_labels = torch.empty(0, dtype=torch.long, device=device)
    test_inputs = torch.empty(0, test_data.shape[1], device=device)
    test_labels = torch.empty(0, dtype=torch.long, device=device)

    # Concatenate all data into a single tensor on the specified device
    for data, labels in train_loader:
        train_inputs = torch.cat((train_inputs, data.to(device)), dim=0)
        train_labels = torch.cat((train_labels, labels.to(device)), dim=0)

    for data, labels in test_loader:
        test_inputs = torch.cat((test_inputs, data.to(device)), dim=0)
        test_labels = torch.cat((test_labels, labels.to(device)), dim=0)

    dataset = {}
    dataset['train_input'] = train_inputs
    dataset['test_input'] = test_inputs
    dataset['train_label'] = train_labels
    dataset['test_label'] = test_labels

    return dataset

And the error i get

Train data shape: torch.Size([86623, 7])
Train target shape: torch.Size([86623])
Test data shape: torch.Size([21656, 7])
Test target shape: torch.Size([21656])
====================================
cpu
description:   0%|                                                          | 0/100 [00:08<?, ?it/s]
Traceback (most recent call last):
  File "/home/test/Desktop/thing_detection/src/main.py", line 45, in <module>
    results = model.train(dataset, opt="Adam", device=device, metrics=(train_acc, test_acc),
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/test/Desktop/thing_detection/.venv/lib/python3.11/site-packages/kan/KAN.py", line 909, in train
    train_loss = loss_fn(pred, dataset['train_label'][train_id].to(device))
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/test/Desktop/thing_detection/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/test/Desktop/thing_detection/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/test/Desktop/thing_detection/.venv/lib/python3.11/site-packages/torch/nn/modules/loss.py", line 1185, in forward
    return F.cross_entropy(input, target, weight=self.weight,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/test/Desktop/thing_detection/.venv/lib/python3.11/site-packages/torch/nn/functional.py", line 3086, in cross_entropy
    return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
IndexError: Target 1 is out of bounds.

Am I missing something?

The text was updated successfully, but these errors were encountered:

KindXiaoming · 2024-07-04T00:49:52Z

Hi, I guess the width should be width=[len(features), 5, 2] instead of width=[len(features), 5, 1] since crossentropy deals with logit (which has 2 dimensions because you have two classes).

goussous0 closed this as completed Jul 4, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Binary classification #292

Binary classification #292

goussous0 commented Jul 1, 2024

KindXiaoming commented Jul 4, 2024 •

edited

Loading

Binary classification #292

Binary classification #292

Comments

goussous0 commented Jul 1, 2024

KindXiaoming commented Jul 4, 2024 • edited Loading

KindXiaoming commented Jul 4, 2024 •

edited

Loading