MolSSI · Lnaden · Aug 1, 2023 · Aug 3, 2023 · Aug 4, 2023 · Aug 4, 2023
diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
@@ -13,7 +13,6 @@ jobs:
  fail-fast: true
  matrix:
  python-version: ["3.7", "3.9", "3.11"]
- pydantic-version: ["1", "2"]
 
  steps:
  - name: Set up Python ${{ matrix.python-version }}
@@ -24,15 +23,6 @@ jobs:
  uses: actions/checkout@v3
  - name: Install poetry
  run: pip install poetry
- # Force pydantic 1.0 by modifying poetry dep "pydantic" string with in-place sed
- # -i is zero-length extension which does effectively in-place sub.
- # Can't do -i '' because Ubuntu sed is -i{suffix} whereas OSX sed is -i {suffix}... ugh
- # ^ start of line, pydantic, optional spaces and > sign, capture the version, replace with ^{version}
- # Should avoid also replacing the autodoc-pydantic spec later on.
- - name: Sed replace pydantic on repo
- run: |
- sed -i 's/^pydantic *= *">*= *\([0-9.]*\)"/pydantic = "^\1"/' pyproject.toml
- if: matrix.pydantic-version == '1'
  - name: Install repo with poetry (full deps)
  if: matrix.python-version != '3.9'
  run: poetry install --no-interaction --no-ansi --all-extras
@@ -65,10 +55,6 @@ jobs:
  python-version: "3.7"
  - name: Install poetry
  run: pip install poetry
- # Force pydantic 1.0 by modifying poetry dep "pydantic" string with in-place sed (see above for details)
- - name: Sed replace pydantic on repo
- run: |
- sed -i 's/^pydantic *= *">*= *\([0-9.]*\)"/pydantic = "^\1"/' pyproject.toml
  - name: Install repo
  run: poetry install --no-interaction --no-ansi
  - name: Build Documentation

diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "qcelemental"
-version = "0.26.0"
+version = "0.27.0"
 description = "Core data structures for Quantum Chemistry."
 authors = ["The QCArchive Development Team <[email protected]>"]
 license = "BSD-3-Clause"
@@ -29,9 +29,10 @@ numpy = [
  { version = ">=1.12.0", python = "3.8" },
  { version = ">=1.24.1", python = ">=3.9" },
 ]
-python = "^3.7"
+python = "^3.7.1"
 pint = ">=0.10.0"
-pydantic = ">=1.8.2"
+pydantic = "^2.1.0"
+pydantic-settings = "*" # Maybe remove when Fractal merges next?
 nglview = { version = "^3.0.3", optional = true }
 ipykernel = { version = "<6.0.0", optional = true }
 importlib-metadata = { version = ">=4.8", python = "<3.8" }
@@ -62,7 +63,7 @@ docutils = "<0.19"
 sphinx = "<6.0.0"
 sphinxcontrib-napoleon = "^0.7"
 sphinx-rtd-theme = "^1.2.0"
-autodoc-pydantic = "^1.8.0"
+autodoc-pydantic = "^2.0.0"
 sphinx-automodapi = "^0.15.0"
 sphinx-autodoc-typehints = "^1.22"
 

diff --git a/qcelemental/datum.py b/qcelemental/datum.py
@@ -3,14 +3,60 @@
 """
 
 from decimal import Decimal
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 
 import numpy as np
+from pydantic import (
+ BaseModel,
+ ConfigDict,
+ SerializationInfo,
+ SerializerFunctionWrapHandler,
+ WrapSerializer,
+ field_validator,
+ model_serializer,
+)
+from typing_extensions import Annotated
+
+
+def reduce_complex(data):
+ # Reduce Complex
+ if isinstance(data, complex):
+ return [data.real, data.imag]
+ # Fallback
+ return data
+
+
+def keep_decimal_cast_ndarray_complex(
+ v: Any, nxt: SerializerFunctionWrapHandler, info: SerializationInfo
+) -> Union[list, Decimal, float]:
+ """
+ Ensure Decimal types are preserved on the way out
+
+ This arose because Decimal was serialized to string and "dump" is equal to "serialize" in v2 pydantic
+ https://docs.pydantic.dev/latest/migration/#changes-to-json-schema-generation
 
-try:
- from pydantic.v1 import BaseModel, validator
-except ImportError: # Will also trap ModuleNotFoundError
- from pydantic import BaseModel, validator
+ This also checks against NumPy Arrays and complex numbers in the instance of being in JSON mode
+ """
+ if isinstance(v, Decimal):
+ return v
+ if info.mode == "json":
+ if isinstance(v, complex):
+ return nxt(reduce_complex(v))
+ if isinstance(v, np.ndarray):
+ # Handle NDArray and complex NDArray
+ flat_list = v.flatten().tolist()
+ reduced_list = list(map(reduce_complex, flat_list))
+ return nxt(reduced_list)
+ try:
+ # Cast NumPy scalar data types to native Python data type
+ v = v.item()
+ except (AttributeError, ValueError):
+ pass
+ return nxt(v)
+
+
+# Only 1 serializer is allowed. You can't chain wrap serializers.
+AnyArrayComplex = Annotated[Any, WrapSerializer(keep_decimal_cast_ndarray_complex)]
 
 
 class Datum(BaseModel):
@@ -38,15 +84,15 @@
  numeric: bool
  label: str
  units: str
- data: Any
+ data: AnyArrayComplex
  comment: str = ""
  doi: Optional[str] = None
  glossary: str = ""
 
- class Config:
- extra = "forbid"
- allow_mutation = False
-  json_encoders = {np.ndarray: lambda v: v.flatten().tolist(), complex: lambda v: (v.real, v.imag)}
+ model_config = ConfigDict(
+ extra="forbid",
+ frozen=True,
+ )
 
  def __init__(self, label, units, data, *, comment=None, doi=None, glossary=None, numeric=True):
  kwargs = {"label": label, "units": units, "data": data, "numeric": numeric}
@@ -59,20 +105,21 @@
 
  super().__init__(**kwargs)
 
- @validator("data")
- def must_be_numerical(cls, v, values, **kwargs):
+ @field_validator("data")
+ @classmethod
+ def must_be_numerical(cls, v, info):
  try:
  1.0 * v
  except TypeError:
  try:
  Decimal("1.0") * v
  except TypeError:
- if values["numeric"]:
+ if info.data["numeric"]:
  raise ValueError(f"Datum data should be float, Decimal, or np.ndarray, not {type(v)}.")
  else:
- values["numeric"] = True
+ info.data["numeric"] = True
  else:
- values["numeric"] = True
+ info.data["numeric"] = True
 
  return v
 
@@ -90,8 +137,35 @@
  text.append("-" * width)
  return "\n".join(text)
 
+ @model_serializer(mode="wrap")
+ def _serialize_model(self, handler) -> Dict[str, Any]:
+ """
+ Customize the serialization output. Does duplicate with some code in model_dump, but handles the case of nested
+ models and any model config options.
+
+ Encoding is handled at the `model_dump` level and not here as that should happen only after EVERYTHING has been
+ dumped/de-pydantic-ized.
+ """
+
+ # Get the default return, let the model_dump handle kwarg
+ default_result = handler(self)
+ # Exclude unset always
+ output_dict = {key: value for key, value in default_result.items() if key in self.model_fields_set}
+ return output_dict
+
  def dict(self, *args, **kwargs):
- return super().dict(*args, **{**kwargs, **{"exclude_unset": True}})
+ """
+ Passthrough to model_dump without deprecation warning
+ exclude_unset is forced through the model_serializer
+ """
+ return super().model_dump(*args, **kwargs)
+
+ def json(self, *args, **kwargs):
+ """
+ Passthrough to model_dump_sjon without deprecation warning
+ exclude_unset is forced through the model_serializer
+ """
+ return super().model_dump_json(*args, **kwargs)
 
  def to_units(self, units=None):
  from .physical_constants import constants

diff --git a/qcelemental/info/cpu_info.py b/qcelemental/info/cpu_info.py
@@ -8,10 +8,8 @@
 from functools import lru_cache
 from typing import List, Optional
 
-try:
- from pydantic.v1 import Field
-except ImportError: # Will also trap ModuleNotFoundError
- from pydantic import Field
+from pydantic import BeforeValidator, Field
+from typing_extensions import Annotated
 
 from ..models import ProtoModel
 
@@ -25,6 +23,13 @@ class VendorEnum(str, Enum):
  arm = "arm"
 
 
+def stringify(v) -> str:
+ return str(v)
+
+
+Stringify = Annotated[str, BeforeValidator(stringify)]
+
+
 class InstructionSetEnum(int, Enum):
  """Allowed instruction sets for CPUs in an ordinal enum."""
 
@@ -40,13 +45,13 @@ class ProcessorInfo(ProtoModel):
  ncores: int = Field(..., description="The number of physical cores on the chip.")
  nthreads: Optional[int] = Field(..., description="The maximum number of concurrent threads.")
  base_clock: float = Field(..., description="The base clock frequency (GHz).")
- boost_clock: Optional[float] = Field(..., description="The boost clock frequency (GHz).")
- model: str = Field(..., description="The model number of the chip.")
+ boost_clock: Optional[float] = Field(None, description="The boost clock frequency (GHz).")
+ model: Stringify = Field(..., description="The model number of the chip.")
  family: str = Field(..., description="The family of the chip.")
- launch_date: Optional[int] = Field(..., description="The launch year of the chip.")
+ launch_date: Optional[int] = Field(None, description="The launch year of the chip.")
  target_use: str = Field(..., description="Target use case (Desktop, Server, etc).")
  vendor: VendorEnum = Field(..., description="The vendor the chip is produced by.")
- microarchitecture: Optional[str] = Field(..., description="The microarchitecture the chip follows.")
+ microarchitecture: Optional[str] = Field(None, description="The microarchitecture the chip follows.")
  instructions: InstructionSetEnum = Field(..., description="The maximum vectorized instruction set available.")
  type: str = Field(..., description="The type of chip (cpu, gpu, etc).")
 

diff --git a/qcelemental/info/dft_info.py b/qcelemental/info/dft_info.py
@@ -4,10 +4,7 @@
 
 from typing import Dict
 
-try:
- from pydantic.v1 import Field
-except ImportError: # Will also trap ModuleNotFoundError
- from pydantic import Field
+from pydantic import Field
 
 from ..models import ProtoModel
 
@@ -71,4 +68,4 @@ def get(name: str) -> DFTFunctionalInfo:
  name = name.replace(x, "")
  break
 
- return dftfunctionalinfo.functionals[name].copy()
+ return dftfunctionalinfo.functionals[name].model_copy()
diff --git a/qcelemental/models/align.py b/qcelemental/models/align.py
@@ -1,14 +1,10 @@
 from typing import Optional
 
 import numpy as np
-
-try:
- from pydantic.v1 import Field, validator
-except ImportError: # Will also trap ModuleNotFoundError
- from pydantic import Field, validator
+from pydantic import Field, field_validator
 
 from ..util import blockwise_contract, blockwise_expand
-from .basemodels import ProtoModel
+from .basemodels import ExtendedConfigDict, ProtoModel
 from .types import Array
 
 __all__ = ["AlignmentMill"]
@@ -30,19 +26,20 @@ class AlignmentMill(ProtoModel):
  atommap: Optional[Array[int]] = Field(None, description="Atom exchange map (nat,) for coordinates.") # type: ignore
  mirror: bool = Field(False, description="Do mirror invert coordinates?")
 
- class Config:
- force_skip_defaults = True
+ model_config = ExtendedConfigDict(force_skip_defaults=True)
 
- @validator("shift")
- def _must_be_3(cls, v, values, **kwargs):
+ @field_validator("shift")
+ @classmethod
+ def _must_be_3(cls, v):
  try:
  v = v.reshape(3)
  except (ValueError, AttributeError):
  raise ValueError("Shift must be castable to shape (3,)!")
  return v
 
- @validator("rotation")
- def _must_be_33(cls, v, values, **kwargs):
+ @field_validator("rotation")
+ @classmethod
+ def _must_be_33(cls, v):
  try:
  v = v.reshape(3, 3)
  except (ValueError, AttributeError):