diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt
index 05156ea26c2a56..8ea8d767da4e98 100644
--- a/doc/requirements-doc.txt
+++ b/doc/requirements-doc.txt
@@ -5,6 +5,7 @@ accelerate>=0.17.0
 click
 colorama
 colorful
+datasets
 # Newer versions of fairscale do not support Python 3.6 even though they still have wheels for it.
 # Have to manually pin it: https://github.com/facebookresearch/fairscale/issues/962
 fairscale; python_version >= '3.7'
diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index 384c8998d60e3f..3cf4649c61fea1 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -267,11 +267,12 @@ def range_tensor(n: int, *, shape: Tuple = (1,), parallelism: int = -1) -> Datas
     Examples:
         >>> import ray
         >>> ds = ray.data.range_tensor(1000, shape=(2, 2))
-        >>> ds  # doctest: +ellipsis
+        >>> ds  # doctest: +ELLIPSIS
         Dataset(
-            num_blocks=...,
-            num_rows=1000,
-            schema={data: numpy.ndarray(shape=(2, 2), dtype=int64)})
+           num_blocks=...,
+           num_rows=1000,
+           schema={data: numpy.ndarray(shape=(2, 2), dtype=int64)}
+        )
         >>> ds.map_batches(lambda arr: arr * 2).take(2) # doctest: +SKIP
         [array([[0, 0],
                 [0, 0]]),
@@ -855,8 +856,8 @@ def read_json(
         from file paths. If your data adheres to a different partitioning scheme, set
         the ``partitioning`` parameter.
 
-        >>> ds = ray.data.read_json("example://year=2022/month=09/sales.json")  # doctest: + SKIP
-        >>> ds.take(1)  # doctest: + SKIP
+        >>> ds = ray.data.read_json("example://year=2022/month=09/sales.json")  # doctest: +SKIP
+        >>> ds.take(1)  # doctest: +SKIP
         [{'order_number': 10107, 'quantity': 30, 'year': '2022', 'month': '09'}
 
     Args:
@@ -950,8 +951,8 @@ def read_csv(
         from file paths. If your data adheres to a different partitioning scheme, set
         the ``partitioning`` parameter.
 
-        >>> ds = ray.data.read_csv("example://year=2022/month=09/sales.csv")  # doctest: + SKIP
-        >>> ds.take(1)  # doctest: + SKIP
+        >>> ds = ray.data.read_csv("example://year=2022/month=09/sales.csv")  # doctest: +SKIP
+        >>> ds.take(1)  # doctest: +SKIP
         [{'order_number': 10107, 'quantity': 30, 'year': '2022', 'month': '09'}]
 
         By default, ``read_csv`` reads all files from file paths. If you want to filter
@@ -1772,20 +1773,52 @@ def from_spark(
 @PublicAPI
 def from_huggingface(
     dataset: Union["datasets.Dataset", "datasets.DatasetDict"],
-) -> Union[MaterializedDataset]:
+) -> Union[MaterializedDataset, Dict[str, MaterializedDataset]]:
     """Create a dataset from a Hugging Face Datasets Dataset.
 
     This function is not parallelized, and is intended to be used
     with Hugging Face Datasets that are loaded into memory (as opposed
     to memory-mapped).
 
+    Example:
+
+    .. doctest::
+        :options: +ELLIPSIS
+
+        >>> import ray
+        >>> import datasets
+        >>> hf_dataset = datasets.load_dataset("tweet_eval", "emotion")
+        Downloading ...
+        >>> ray_ds = ray.data.from_huggingface(hf_dataset)
+        >>> ray_ds
+        {'train': MaterializedDataset(
+           num_blocks=1,
+           num_rows=3257,
+           schema={text: string, label: int64}
+        ), 'test': MaterializedDataset(
+           num_blocks=1,
+           num_rows=1421,
+           schema={text: string, label: int64}
+        ), 'validation': MaterializedDataset(
+           num_blocks=1,
+           num_rows=374,
+           schema={text: string, label: int64}
+        )}
+        >>> ray_ds = ray.data.from_huggingface(hf_dataset["train"])
+        >>> ray_ds
+        MaterializedDataset(
+           num_blocks=1,
+           num_rows=3257,
+           schema={text: string, label: int64}
+        )
+
     Args:
-        dataset: A Hugging Face ``Dataset``, or ``DatasetDict``.
-            ``IterableDataset`` is not supported.
+        dataset: A Hugging Face Dataset, or DatasetDict. IterableDataset is not
+            supported. ``IterableDataset`` is not supported.
 
     Returns:
-        MaterializedDataset holding Arrow records from the Hugging Face Dataset, or a
-        dict of MaterializedDataset in case ``dataset`` is a ``DatasetDict``.
+        Dataset holding Arrow records from the Hugging Face Dataset, or a dict of
+            datasets in case dataset is a DatasetDict.
     """
     import datasets
 
@@ -1797,12 +1830,22 @@ def convert(ds: "datasets.Dataset") -> Dataset:
         return ray_ds
 
     if isinstance(dataset, datasets.DatasetDict):
+        available_keys = list(dataset.keys())
+        logger.warning(
+            "You provided a Huggingface DatasetDict which contains multiple "
+            "datasets. The output of `from_huggingface` is a dictionary of Ray "
+            "Datasets. To convert just a single Huggingface Dataset to a "
+            "Ray Dataset, specify a split. For example, "
+            "`ray.data.from_huggingface(my_dataset_dictionary"
+            f"['{available_keys[0]}'])`. "
+            f"Available splits are {available_keys}."
+        )
         return {k: convert(ds) for k, ds in dataset.items()}
     elif isinstance(dataset, datasets.Dataset):
         return convert(dataset)
     else:
         raise TypeError(
-            "`dataset` must be a `datasets.Dataset` or `datasets.DatasetDict`, "
+            "`dataset` must be a `datasets.Dataset` or `datasets.DatasetDict`."
             f"got {type(dataset)}"
         )