Skip to content

Commit

Permalink
update docker + readme
Browse files Browse the repository at this point in the history
  • Loading branch information
sdtblck committed Oct 17, 2021
1 parent a077464 commit 1eff7a2
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 9 deletions.
8 changes: 7 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ RUN apt-get update -y && \
git python3 python3-dev libpython3-dev python3-pip sudo pdsh \
htop llvm-9-dev tmux zstd software-properties-common build-essential autotools-dev \
nfs-common pdsh cmake g++ gcc curl wget vim less unzip htop iftop iotop ca-certificates ssh \
rsync iputils-ping net-tools libcupti-dev && \
rsync iputils-ping net-tools libcupti-dev nano && \
update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
pip install --upgrade pip && \
Expand All @@ -29,6 +29,7 @@ RUN echo 'password' >> password.txt && \
echo "Set disable_coredump false" >> /etc/sudo.conf && \
# Clean up
rm password.txt

# Expose SSH port
EXPOSE 22

Expand All @@ -46,9 +47,11 @@ RUN mkdir -p /build && \
test -f /usr/local/mpi/bin/mpic++ && \
cd ~ && \
rm -rf /build

# Needs to be in docker PATH if compiling other items & bashrc PATH (later)
ENV PATH=/usr/local/mpi/bin:${PATH} \
LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:${LD_LIBRARY_PATH}

# Create a wrapper for OpenMPI to allow running as root by default
RUN mv /usr/local/mpi/bin/mpirun /usr/local/mpi/bin/mpirun.real && \
echo '#!/bin/bash' > /usr/local/mpi/bin/mpirun && \
Expand Down Expand Up @@ -76,6 +79,9 @@ COPY requirements/requirements-onebitadam.txt .
COPY requirements/requirements-sparseattention.txt .
RUN pip install -r requirements.txt && pip install -r requirements-onebitadam.txt && pip install -r requirements-sparseattention.txt && pip cache purge

# install fused kernels
RUN sudo python setup.py install

## Install APEX
RUN pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" git+https://github.com/NVIDIA/apex.git@a651e2c24ecf97cbf367fd3f330df36760e1c597

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Coming soon: a colab notebook for trying out the model.
First make sure you are in an environment with Python 3.8 or later and `torch>=1.8` installed. Then run `pip install -r requirements/requirements.txt`.
You may need to change the version of `cupy-cudaxxx` to match your machine's cuda version.

Some features rely on apex, which you can install with the command below:
nvidia's apex is an optional extra (used only for FusedAdam, which may offer some performance improvement):

```bash
pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" git+https://github.com/NVIDIA/apex.git@e2083df5eb96643c61613b9df48dd4eea6b07690
Expand Down
15 changes: 8 additions & 7 deletions megatron/neox_arguments/neox_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ class NeoXArgsModel(NeoXArgsTemplate):
Maximum number of position embeddings to use. This is the size of position embedding.
"""

norm: Literal["layernorm", "rmsnorm", "scalenorm", "apexlayernorm"] = "layernorm"
norm: Literal["layernorm", "rmsnorm", "scalenorm"] = "layernorm"
"""
Normalization layer to use. Choose from "layernorm", "rmsnorm", "scalenorm", "apexlayernorm".
Normalization layer to use. Choose from "layernorm", "rmsnorm", "scalenorm".
"""

layernorm_epsilon: float = 1.0e-5
Expand All @@ -124,7 +124,9 @@ class NeoXArgsModel(NeoXArgsTemplate):
Scalenorm epsilon
"""

pos_emb: Literal['learned', 'rotary', 'sinusoidal', 'rpe', 'alibi', 'none'] = "learned"
pos_emb: Literal[
"learned", "rotary", "sinusoidal", "rpe", "alibi", "none"
] = "learned"
"""
Type of positional embedding to use - choose from 'learned', 'rotary', 'sinusoidal', 'rpe', 'none'
"""
Expand Down Expand Up @@ -205,7 +207,7 @@ class NeoXArgsModel(NeoXArgsTemplate):
Pad the vocab size to be divisible by this value. This is added for computational efficiency reasons.
"""

activation : Literal["gelu", "geglu", "relu", "softsign", "swish", "mish"] = "gelu"
activation: Literal["gelu", "geglu", "relu", "softsign", "swish", "mish"] = "gelu"
"""
Activation function to use - choose from ["gelu", "geglu", "relu", "softsign", "swish", "mish"]
"""
Expand Down Expand Up @@ -301,7 +303,7 @@ class NeoXArgsModel(NeoXArgsTemplate):
If None - gmlp model doesn't use attention.
"""

gpt_j_residual : bool = False
gpt_j_residual: bool = False
"""
If false, we use the conventional residual path:
x = x + attn(ln1(x))
Expand All @@ -310,7 +312,7 @@ class NeoXArgsModel(NeoXArgsTemplate):
x = ln(x)
x = x + attn(x) + mlp(x)
"""

soft_prompt_tuning: dict = None
"""
Dictionary configuring the soft prompt tuning parameters.
Expand Down Expand Up @@ -487,7 +489,6 @@ class NeoXArgsLogging(NeoXArgsTemplate):
"""



@dataclass
class NeoXArgsOther(NeoXArgsTemplate):
"""
Expand Down

0 comments on commit 1eff7a2

Please sign in to comment.