pytorch · sdesrozis · Sep 17, 2021 · Sep 9, 2021 · Sep 9, 2021 · Sep 9, 2021
diff --git a/examples/contrib/transformers/README.md b/examples/contrib/transformers/README.md
@@ -34,6 +34,15 @@ python main.py run
 ```
 If needed, please, adjust the batch size to your GPU device with `--batch_size` argument.
 
+The default model is `bert-base-uncased` incase you need to change that use the `--model` argument, for details on which models can be used refer [here](https://huggingface.co/transformers/v3.0.2/model_doc/auto.html#automodelforsequenceclassification)
+
+Example: 
+```bash
+#Using DistilBERT which has 40% less parameters than bert-base-uncased
+python main.py run --model="distilbert-base-uncased"
+```
+
+
 For details on accepted arguments:
 
 ```bash

diff --git a/examples/contrib/transformers/dataset.py b/examples/contrib/transformers/dataset.py
@@ -11,24 +11,19 @@ def __init__(self, texts, labels, tokenizer, max_length):
  def __getitem__(self, idx):
  text = str(self.texts[idx])
  text = " ".join(text.split())
- inputs = self.tokenizer.encode_plus(
- text, None, add_special_tokens=True, max_length=self.max_length, truncation=True
+ inputs = self.tokenizer(
+ text,
+ None,
+ add_special_tokens=True,
+ max_length=self.max_length,
+ truncation=True,
+ padding="max_length",
+ return_tensors="pt",
  )
+ inputs = {k: v.type(torch.long).squeeze(0) for k, v in inputs.items()}
 
- ids = inputs["input_ids"]
- token_type_ids = inputs["token_type_ids"]
- mask = inputs["attention_mask"]
- padding_length = self.max_length - len(ids)
-
- ids = ids + ([0] * padding_length)
- mask = mask + ([0] * padding_length)
- token_type_ids = token_type_ids + ([0] * padding_length)
- return {
- "input_ids": torch.tensor(ids, dtype=torch.long),
- "attention_mask": torch.tensor(mask, dtype=torch.long),
- "token_type_ids": torch.tensor(token_type_ids, dtype=torch.long),
- "label": torch.tensor(self.labels[idx], dtype=torch.float),
- }
+ labels_pt = torch.tensor(self.labels[idx], dtype=torch.float)
+ return inputs, labels_pt
 
  def __len__(self):
  return len(self.labels)
diff --git a/examples/contrib/transformers/main.py b/examples/contrib/transformers/main.py
@@ -144,7 +144,7 @@ def run(
  dropout=0.3,
  n_fc=768,
  max_length=256,
- batch_size=128,
+ batch_size=32,
  weight_decay=0.01,
  num_workers=4,
  num_epochs=3,
@@ -303,21 +303,17 @@ def create_trainer(model, optimizer, criterion, lr_scheduler, train_sampler, con
 
  def train_step(engine, batch):
 
- input_ids = batch["input_ids"]
- attention_mask = batch["attention_mask"]
- token_type_ids = batch["token_type_ids"]
- labels = batch["label"].view(-1, 1)
+ input_batch = batch[0]
+ labels = batch[1].view(-1, 1)
 
- if input_ids.device != device:
- input_ids = input_ids.to(device, non_blocking=True, dtype=torch.long)
- attention_mask = attention_mask.to(device, non_blocking=True, dtype=torch.long)
- token_type_ids = token_type_ids.to(device, non_blocking=True, dtype=torch.long)
+ if labels.device != device:
+ input_batch = {k: v.to(device, non_blocking=True, dtype=torch.long) for k, v in batch[0].items()}
  labels = labels.to(device, non_blocking=True, dtype=torch.float)
 
  model.train()
 
  with autocast(enabled=with_amp):
- y_pred = model(input_ids, attention_mask, token_type_ids)
+ y_pred = model(input_batch)
  loss = criterion(y_pred, labels)
 
  optimizer.zero_grad()
@@ -372,19 +368,16 @@ def create_evaluator(model, metrics, config, tag="val"):
  @torch.no_grad()
  def evaluate_step(engine, batch):
  model.eval()
- input_ids = batch["input_ids"]
- attention_mask = batch["attention_mask"]
- token_type_ids = batch["token_type_ids"]
- labels = batch["label"].view(-1, 1)
-
- if input_ids.device != device:
- input_ids = input_ids.to(device, non_blocking=True, dtype=torch.long)
- attention_mask = attention_mask.to(device, non_blocking=True, dtype=torch.long)
- token_type_ids = token_type_ids.to(device, non_blocking=True, dtype=torch.long)
+
+ input_batch = batch[0]
+ labels = batch[1].view(-1, 1)
+
+ if labels.device != device:
+ input_batch = {k: v.to(device, non_blocking=True, dtype=torch.long) for k, v in batch[0].items()}
  labels = labels.to(device, non_blocking=True, dtype=torch.float)
 
  with autocast(enabled=with_amp):
- output = model(input_ids, attention_mask, token_type_ids)
+ output = model(input_batch)
  return output, labels
 
  evaluator = Engine(evaluate_step)

diff --git a/examples/contrib/transformers/model.py b/examples/contrib/transformers/model.py
@@ -1,19 +1,22 @@
 import torch.nn as nn
-from transformers import AutoModel
+from transformers import AutoConfig, AutoModelForSequenceClassification
 
 
 class TransformerModel(nn.Module):
  def __init__(self, model_name, model_dir, dropout, n_fc, n_classes):
  super(TransformerModel, self).__init__()
- self.transformer = AutoModel.from_pretrained(model_name, cache_dir=model_dir)
- self.drop = nn.Dropout(dropout)
- self.classifier = nn.Linear(n_fc, n_classes)
+ self.config = AutoConfig.from_pretrained(
+ model_name,
+ num_labels=n_classes,
+ output_hidden_states=n_fc,
+ classifier_dropout=dropout,
+ output_attentions=True,
+ )
+ self.transformer = AutoModelForSequenceClassification.from_pretrained(
+ model_name, cache_dir=model_dir, config=self.config
+ )
 
- def forward(self, ids, mask, token_type_ids):
+ def forward(self, inputs):
+ output = self.transformer(**inputs)["logits"]
 
- hidden_output, pooled_output = self.transformer(
- ids, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False
- )
- pooled_output = self.drop(pooled_output)
- output = self.classifier(pooled_output)
  return output