Skip to content

Commit

Permalink
fix small bug where sequence length is not passed into attention class (
Browse files Browse the repository at this point in the history
#21) (#23)

* fix small bug where sequence length is not passed into attention class

* fix bug with mask and half values, as well as masking in dense attention

* make sure install deepspeed with pip sudo

This allows `gpt3small` to run but does not fix the problems with sparse attention. See #22

Co-authored-by: Phil Wang <[email protected]>
  • Loading branch information
StellaAthena and lucidrains committed Jan 1, 2021
1 parent 8b0b18a commit 3c7a44a
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 9 deletions.
13 changes: 5 additions & 8 deletions gpt_neox/gpt_neox.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@

from einops import rearrange

# constants

MASK_VALUE = -1e7

# helpers

def exists(val):
Expand Down Expand Up @@ -49,12 +45,12 @@ def forward(self, x, **kwargs):

# attention

def dense_attn(q, k, v, key_padding_mask = None, dropout_fn = None):
def dense_attn(q, k, v, attn_mask = None, dropout_fn = None):
scale = q.shape[-1] ** -0.5
sim = einsum('b h i d, b h j d -> b h i j', q, k) * scale

if exists(key_padding_mask):
sim = sim + key_padding_mask[:, None, :, :]
if exists(attn_mask):
sim = sim + attn_mask[None, None, :, :]

attn = sim.softmax(dim=-1)

Expand Down Expand Up @@ -103,7 +99,8 @@ def forward(self, x, **kwargs):
i, j = q.shape[-2], k.shape[-2]
bool_mask = torch.ones(i, j, device=device).triu_(j - i + 1).bool()
mask = torch.zeros(i, j, device=device).to(q)
mask.masked_fill_(bool_mask, MASK_VALUE)
mask_value = -torch.finfo(q.dtype).max
mask.masked_fill_(bool_mask, mask_value)

out = self.attn_fn(q, k, v, attn_mask=mask)
out = rearrange(out, 'b h n d -> b n (h d)')
Expand Down
2 changes: 1 addition & 1 deletion install_deepspeed.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
sudo apt-get -y install llvm-9-dev cmake
git clone https://github.com/microsoft/DeepSpeed.git /tmp/Deepspeed
cd /tmp/Deepspeed && DS_BUILD_SPARSE_ATTN=1 ./install.sh
cd /tmp/Deepspeed && DS_BUILD_SPARSE_ATTN=1 ./install.sh -s

0 comments on commit 3c7a44a

Please sign in to comment.