Lightning-AI · jingxu10 · May 26, 2023
@@ -93,8 +93,9 @@ This is essentially the same as running ``python path/to/your/script.py``, but i
  itself and are expected to be parsed there.
 
  Options:
- --accelerator [cpu|gpu|cuda|mps|tpu]
+ --accelerator [cpu|gpu|cuda|mps|tpu|xpu]
  The hardware accelerator to run on.
+ Install Lightning-XPU to enable ``xpu``.
  --strategy [ddp|dp|deepspeed] Strategy for how to run across multiple
  devices.
  --devices TEXT Number of devices to run on (``int``), which

@@ -17,6 +17,7 @@
  ../advanced/model_parallel
  Train on single or multiple GPUs <../accelerators/gpu>
  Train on single or multiple HPUs <../integrations/hpu/index>
+ Train on single or multiple XPUs <../integrations/xpu/index>
  Train on single or multiple TPUs <../accelerators/tpu>
  Train on MPS <../accelerators/mps>
  Use a pretrained model <../advanced/pretrained>
@@ -167,6 +168,13 @@ How-to Guides
  :col_css: col-md-4
  :height: 180
 
+.. displayitem::
+ :header: Train on single or multiple XPUs
+ :description: Train models faster with XPU accelerators
+ :button_link: ../integrations/xpu/index.html
+ :col_css: col-md-4
+ :height: 180
+
 .. displayitem::
  :header: Train on single or multiple TPUs
  :description: TTrain models faster with TPU accelerators

@@ -133,6 +133,13 @@ Customize and extend Lightning for things like custom hardware or distributed st
  :button_link: integrations/hpu/index.html
  :height: 100
 
+.. displayitem::
+ :header: Train on single or multiple XPUs
+ :description: Train models faster with XPUs.
+ :col_css: col-md-12
+ :button_link: integrations/xpu/index.html
+ :height: 100
+
 .. displayitem::
  :header: Train on single or multiple TPUs
  :description: Train models faster with TPUs.

@@ -93,6 +93,11 @@ def _load_py_module(name: str, location: str) -> ModuleType:
  target_dir="docs/source-pytorch/integrations/hpu",
  checkout="refs/tags/1.4.0",
 )
+assist_local.AssistantCLI.pull_docs_files(
+ gh_user_repo="Lightning-AI/lightning-XPU",
+ target_dir="docs/source-pytorch/integrations/xpu",
+ checkout="tags/1.0.0",
+)
 
 # Copy strategies docs as single pages
 assist_local.AssistantCLI.pull_docs_files(
@@ -355,6 +360,7 @@ def _load_py_module(name: str, location: str) -> ModuleType:
  "PIL": ("https://pillow.readthedocs.io/en/stable/", None),
  "torchmetrics": ("https://lightning.ai/docs/torchmetrics/stable/", None),
  "lightning_habana": ("https://lightning-ai.github.io/lightning-Habana/", None),
+ "intel-xpu": ("https://lightning-ai.github.io/lightning-XPU/", None),
  "tensorboardX": ("https://tensorboardx.readthedocs.io/en/stable/", None),
  # needed for referencing Fabric from lightning scope
  "lightning.fabric": ("https://lightning.ai/docs/fabric/stable/", None),

@@ -11,6 +11,7 @@ Currently there are accelerators for:
 - :doc:`GPU <../accelerators/gpu>`
 - :doc:`TPU <../accelerators/tpu>`
 - :doc:`HPU <../integrations/hpu/index>`
+- :doc:`XPU <../integrations/xpu/index>`
 - :doc:`MPS <../accelerators/mps>`
 
 The Accelerator is part of the Strategy which manages communication across multiple devices (distributed communication).
@@ -31,16 +32,16 @@ Create a Custom Accelerator
 .. warning:: This is an :ref:`experimental <versioning:Experimental API>` feature.
 
 Here is how you create a new Accelerator.
-Let's pretend we want to integrate the fictional XPU accelerator and we have access to its hardware through a library
-``xpulib``.
+Let's pretend we want to integrate the fictional YPU accelerator and we have access to its hardware through a library
+``ypulib``.
 
 .. code-block:: python
 
- import xpulib
+ import ypulib
 
 
- class XPUAccelerator(Accelerator):
- """Support for a hypothetical XPU, optimized for large-scale machine learning."""
+ class YPUAccelerator(Accelerator):
+ """Support for a hypothetical YPU, optimized for large-scale machine learning."""
 
  @staticmethod
  def parse_devices(devices: Any) -> Any:
@@ -51,29 +52,29 @@ Let's pretend we want to integrate the fictional XPU accelerator and we have acc
  @staticmethod
  def get_parallel_devices(devices: Any) -> Any:
  # Here, convert the device indices to actual device objects
- return [torch.device("xpu", idx) for idx in devices]
+ return [torch.device("ypu", idx) for idx in devices]
 
  @staticmethod
  def auto_device_count() -> int:
  # Return a value for auto-device selection when `Trainer(devices="auto")`
- return xpulib.available_devices()
+ return ypulib.available_devices()
 
  @staticmethod
  def is_available() -> bool:
- return xpulib.is_available()
+ return ypulib.is_available()
 
  def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]:
  # Return optional device statistics for loggers
  return {}
 
 
-Finally, add the XPUAccelerator to the Trainer:
+Finally, add the YPUAccelerator to the Trainer:
 
 .. code-block:: python
 
  from lightning.pytorch import Trainer
 
- accelerator = XPUAccelerator()
+ accelerator = YPUAccelerator()
  trainer = Trainer(accelerator=accelerator, devices=2)
 
 
@@ -89,28 +90,28 @@ If you wish to switch to a custom accelerator from the CLI without code changes,
 
 .. code-block:: python
 
- class XPUAccelerator(Accelerator):
+ class YPUAccelerator(Accelerator):
  ...
 
  @classmethod
  def register_accelerators(cls, accelerator_registry):
  accelerator_registry.register(
- "xpu",
+ "ypu",
  cls,
- description=f"XPU Accelerator - optimized for large-scale machine learning.",
+ description=f"YPU Accelerator - optimized for large-scale machine learning.",
  )
 
 Now, this is possible:
 
 .. code-block:: python
 
- trainer = Trainer(accelerator="xpu")
+ trainer = Trainer(accelerator="ypu")
 
 Or if you are using the Lightning CLI, for example:
 
 .. code-block:: bash
 
- python train.py fit --trainer.accelerator=xpu --trainer.devices=2
+ python train.py fit --trainer.accelerator=ypu --trainer.devices=2
 
 
 ----------

@@ -21,6 +21,7 @@
  GPU <../accelerators/gpu>
  Half precision <../common/precision>
  HPU <../integrations/hpu/index>
+ XPU <../integrations/xpu/index>
  Inference <../deploy/production_intermediate>
  Lightning CLI <../cli/lightning_cli>
  LightningDataModule <../data/datamodule>
@@ -186,6 +187,13 @@ Glossary
  :button_link: ../integrations/hpu/index.html
  :height: 100
 
+.. displayitem::
+ :header: XPU
+ :description: Intel® Graphics Cards for faster training
+ :col_css: col-md-12
+ :button_link: ../integrations/xpu/index.html
+ :height: 100
+
 .. displayitem::
  :header: Inference
  :description: Making predictions by applying a trained model to unlabeled examples

@@ -0,0 +1,40 @@
+.. _xpu:
+
+Accelerator: XPU training
+=========================
+
+.. raw:: html
+
+ <div class="display-card-container">
+ <div class="row">
+
+.. Add callout items below this line
+
+.. displayitem::
+ :header: Basic
+ :description: Learn the basics of single and multi-XPU core training.
+ :col_css: col-md-4
+ :button_link: basic.html
+ :height: 150
+ :tag: basic
+
+.. displayitem::
+ :header: Intermediate
+ :description: Enable state-of-the-art scaling with advanced mix-precision settings.
+ :col_css: col-md-4
+ :button_link: intermediate.html
+ :height: 150
+ :tag: intermediate
+
+.. displayitem::
+ :header: Advanced
+ :description: Explore state-of-the-art scaling with additional advanced configurations.
+ :col_css: col-md-4
+ :button_link: advanced.html
+ :height: 150
+ :tag: advanced
+
+.. raw:: html
+
+ </div>
+ </div>
@@ -0,0 +1,37 @@
+:orphan:
+
+######################
+Level 19: Explore XPUs
+######################
+
+Explore Intel® Graphics Cards (XPU) for model scaling.
+
+----
+
+.. raw:: html
+
+ <div class="display-card-container">
+ <div class="row">
+
+.. Add callout items below this line
+
+.. displayitem::
+ :header: Train models on XPUs
+ :description: Learn the basics of single and multi-XPU core training.
+ :col_css: col-md-6
+ :button_link: ../integrations/xpu/basic.html
+ :height: 150
+ :tag: basic
+
+.. displayitem::
+ :header: Optimize models training on XPUs
+ :description: Enable state-of-the-art scaling with advanced mixed-precision settings.
+ :col_css: col-md-6
+ :button_link: ../integrations/xpu/intermediate.html
+ :height: 150
+ :tag: intermediate
+
+.. raw:: html
+
+ </div>
+ </div>
@@ -0,0 +1,69 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http:https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from lightning.pytorch import LightningModule
+from lightning.pytorch.cli import LightningCLI
+from lightning.pytorch.demos.mnist_datamodule import MNISTDataModule
+from torch.nn import functional as F
+
+
+class LitClassifier(LightningModule):
+ def __init__(self):
+ super().__init__()
+ self.l1 = torch.nn.Linear(28 * 28, 10)
+
+ def forward(self, x):
+ return torch.relu(self.l1(x.view(x.size(0), -1)))
+
+ def training_step(self, batch, batch_idx):
+ x, y = batch
+ return F.cross_entropy(self(x), y)
+
+ def validation_step(self, batch, batch_idx):
+ x, y = batch
+ probs = self(x)
+ acc = self.accuracy(probs, y)
+ self.log("val_acc", acc)
+
+ def test_step(self, batch, batch_idx):
+ x, y = batch
+ logits = self(x)
+ acc = self.accuracy(logits, y)
+ self.log("test_acc", acc)
+
+ @staticmethod
+ def accuracy(logits, y):
+ return torch.sum(torch.eq(torch.argmax(logits, -1), y).to(torch.float32)) / len(y)
+
+ def configure_optimizers(self):
+ return torch.optim.Adam(self.parameters(), lr=0.02)
+
+
+if __name__ == "__main__":
+ cli = LightningCLI(
+ LitClassifier,
+ MNISTDataModule,
+ trainer_defaults={
+ "accelerator": "gpu",
+ "devices": 2,
+ "max_epochs": 1,
+ },
+ run=False,
+ save_config_kwargs={"overwrite": True},
+ )
+
+ # Run the model ⚡
+ cli.trainer.fit(cli.model, datamodule=cli.datamodule)
+ cli.trainer.validate(cli.model, datamodule=cli.datamodule)
+ cli.trainer.test(cli.model, datamodule=cli.datamodule)
@@ -1,2 +1,5 @@
 # validation accelerator connectors
 lightning-habana >=1.2.0, <1.3.0
+
+# validation XPU connectors
+lightning-xpu >=0.1.0
@@ -22,3 +22,10 @@
 
 ACCELERATOR_REGISTRY = _AcceleratorRegistry()
 _register_classes(ACCELERATOR_REGISTRY, "register_accelerators", sys.modules[__name__], Accelerator)
+
+from lightning.fabric.utilities.imports import _lightning_xpu_available
+
+if _lightning_xpu_available() and "xpu" not in ACCELERATOR_REGISTRY:
+ from lightning_xpu.fabric import XPUAccelerator
+
+ XPUAccelerator.register_accelerators(ACCELERATOR_REGISTRY)
@@ -29,14 +29,17 @@
 from lightning.fabric.utilities.consolidate_checkpoint import _process_cli_args
 from lightning.fabric.utilities.device_parser import _parse_gpu_ids
 from lightning.fabric.utilities.distributed import _suggested_max_num_threads
+from lightning.fabric.utilities.imports import _lightning_xpu_available
 from lightning.fabric.utilities.load import _load_distributed_checkpoint
 
 _log = logging.getLogger(__name__)
 
 _CLICK_AVAILABLE = RequirementCache("click")
 _LIGHTNING_SDK_AVAILABLE = RequirementCache("lightning_sdk")
 
-_SUPPORTED_ACCELERATORS = ("cpu", "gpu", "cuda", "mps", "tpu")
+_SUPPORTED_ACCELERATORS = ["cpu", "gpu", "cuda", "mps", "tpu"]
+if _lightning_xpu_available():
+ _SUPPORTED_ACCELERATORS.append("xpu")
 
 
 def _get_supported_strategies() -> List[str]:
@@ -209,13 +212,17 @@ def _set_env_variables(args: Namespace) -> None:
 def _get_num_processes(accelerator: str, devices: str) -> int:
  """Parse the `devices` argument to determine how many processes need to be launched on the current machine."""
  if accelerator == "gpu":
- parsed_devices = _parse_gpu_ids(devices, include_cuda=True, include_mps=True)
+ parsed_devices = _parse_gpu_ids(devices, include_cuda=True, include_mps=True, include_xpu=True)
  elif accelerator == "cuda":
  parsed_devices = CUDAAccelerator.parse_devices(devices)
  elif accelerator == "mps":
  parsed_devices = MPSAccelerator.parse_devices(devices)
  elif accelerator == "tpu":
  raise ValueError("Launching processes for TPU through the CLI is not supported.")
+ elif accelerator == "xpu":
+ from lightning_xpu.fabric import XPUAccelerator
+
+ parsed_devices = XPUAccelerator.parse_devices(devices)
  else:
  return CPUAccelerator.parse_devices(devices)
  return len(parsed_devices) if parsed_devices is not None else 0