ray-project · amogkam · Jul 7, 2023 · Jun 30, 2023 · Jun 30, 2023 · Jun 30, 2023
@@ -227,6 +227,9 @@ Partitioning API
  datasource.PathPartitionEncoder
  datasource.PathPartitionParser
  datasource.PathPartitionFilter
+ datasource.FileExtensionFilter
+
+.. _metadata_provider:
 
 MetadataProvider API
 --------------------
@@ -240,3 +243,16 @@ MetadataProvider API
  datasource.DefaultFileMetadataProvider
  datasource.DefaultParquetMetadataProvider
  datasource.FastFileMetadataProvider
+
+
+.. _block_write_path_provider:
+
+BlockWritePathProvider API
+--------------------------
+
+.. autosummary::
+ :toctree: doc/
+
+ datasource.BlockWritePathProvider
+ datasource.DefaultBlockWritePathProvider
+
@@ -18,6 +18,8 @@ If your transformation isn't vectorized, there's no performance benefit.
 Optimizing reads
 ----------------
 
+.. _read_parallelism:
+
 Tuning read parallelism
 ~~~~~~~~~~~~~~~~~~~~~~~
 

@@ -0,0 +1,2 @@
+a,b
+0,1
@@ -0,0 +1,2 @@
+order_number,quantity
+10107,30
@@ -0,0 +1,4 @@
+{
+ "order_number": 10107,
+ "quantity": 30
+}
@@ -494,9 +494,6 @@ def test_convert_types(ray_start_regular_shared):
 def test_from_items(ray_start_regular_shared):
  ds = ray.data.from_items(["hello", "world"])
  assert extract_values("item", ds.take()) == ["hello", "world"]
-
- ds = ray.data.from_items([{"hello": "world"}], output_arrow_format=True)
- assert ds.take() == [{"hello": "world"}]
  assert isinstance(next(ds.iter_batches(batch_format=None)), pa.Table)
 
 

@@ -71,9 +71,7 @@ def write(self, sample_batch: SampleBatchType):
  # Todo: We should flush at the end of sampling even if this
  # condition was not reached.
  if len(self.samples) >= self.max_num_samples_per_file:
- ds = data.from_items(self.samples, output_arrow_format=True).repartition(
- num_blocks=1, shuffle=False
- )
+ ds = data.from_items(self.samples).repartition(num_blocks=1, shuffle=False)
  if self.format == "json":
  ds.write_json(self.path, try_create_dir=True)
  elif self.format == "parquet":