More headway, starting on tests. have to be careful with serializers …

…and only use json on numpy arrays for now.
MolSSI · Lnaden · Aug 1, 2023 · Aug 3, 2023 · Aug 4, 2023 · Aug 4, 2023
commit 5bd215b293975d36cb621ded7c27624df3c5494b
diff --git a/qcelemental/datum.py b/qcelemental/datum.py
@@ -4,10 +4,28 @@
 
 from decimal import Decimal
 from typing import Any, Dict, Optional
+from typing_extensions import Annotated
 
 import numpy as np
 
-from pydantic import BaseModel, field_validator
+from pydantic import BaseModel, field_validator, ConfigDict, WrapSerializer, SerializerFunctionWrapHandler
+
+
+def cast_ndarray(v: Any, nxt: SerializerFunctionWrapHandler) -> str:
+ """Special helper to list NumPy arrays before serializing"""
+ if isinstance(v, np.ndarray):
+ return f'{nxt(v.flatten().tolist())}'
+ return f'{nxt(v)}'
+
+
+def cast_complex(v: Any, nxt: SerializerFunctionWrapHandler) -> str:
+ """Special helper to serialize NumPy arrays before serializing"""
+ if isinstance(v, complex):
+ return f'{nxt((v.real, v.imag))}'
+ return f'{nxt(v)}'
+
+
+AnyArrayComplex = Annotated[Any, WrapSerializer(cast_ndarray), WrapSerializer(cast_complex)]
 
 
 class Datum(BaseModel):
@@ -35,15 +53,14 @@ class Datum(BaseModel):
  numeric: bool
  label: str
  units: str
- data: Any
+ data: AnyArrayComplex
  comment: str = ""
  doi: Optional[str] = None
  glossary: str = ""
 
- class Config:
- extra = "forbid"
- allow_mutation = False
- json_encoders = {np.ndarray: lambda v: v.flatten().tolist(), complex: lambda v: (v.real, v.imag)}
+ model_config = ConfigDict(extra="forbid",
+ frozen=True,
+ )
 
  def __init__(self, label, units, data, *, comment=None, doi=None, glossary=None, numeric=True):
  kwargs = {"label": label, "units": units, "data": data, "numeric": numeric}
@@ -89,7 +106,7 @@ def __str__(self, label=""):
  return "\n".join(text)
 
  def dict(self, *args, **kwargs):
- return super().dict(*args, **{**kwargs, **{"exclude_unset": True}})
+ return super().model_dump(*args, **{**kwargs, **{"exclude_unset": True}})
 
  def to_units(self, units=None):
  from .physical_constants import constants

diff --git a/qcelemental/info/cpu_info.py b/qcelemental/info/cpu_info.py
@@ -7,8 +7,9 @@
 from enum import Enum
 from functools import lru_cache
 from typing import List, Optional
+from typing_extensions import Annotated
 
-from pydantic import Field
+from pydantic import Field, BeforeValidator
 
 from ..models import ProtoModel
 
@@ -22,6 +23,13 @@ class VendorEnum(str, Enum):
  arm = "arm"
 
 
+def stringify(v) -> str:
+ return str(v)
+
+
+Stringify = Annotated[str, BeforeValidator(stringify)]
+
+
 class InstructionSetEnum(int, Enum):
  """Allowed instruction sets for CPUs in an ordinal enum."""
 
@@ -38,7 +46,7 @@ class ProcessorInfo(ProtoModel):
  nthreads: Optional[int] = Field(..., description="The maximum number of concurrent threads.")
  base_clock: float = Field(..., description="The base clock frequency (GHz).")
  boost_clock: Optional[float] = Field(None, description="The boost clock frequency (GHz).")
- model: str = Field(..., description="The model number of the chip.")
+ model: Stringify = Field(..., description="The model number of the chip.")
  family: str = Field(..., description="The family of the chip.")
  launch_date: Optional[int] = Field(None, description="The launch year of the chip.")
  target_use: str = Field(..., description="Target use case (Desktop, Server, etc).")

diff --git a/qcelemental/models/align.py b/qcelemental/models/align.py
@@ -5,7 +5,7 @@
 from pydantic import Field, field_validator
 
 from ..util import blockwise_contract, blockwise_expand
-from .basemodels import ProtoModel
+from .basemodels import ProtoModel, ExtendedConfigDict
 from .types import Array
 
 __all__ = ["AlignmentMill"]
@@ -27,8 +27,7 @@ class AlignmentMill(ProtoModel):
  atommap: Optional[Array[int]] = Field(None, description="Atom exchange map (nat,) for coordinates.") # type: ignore
  mirror: bool = Field(False, description="Do mirror invert coordinates?")
 
- class Config:
- force_skip_defaults = True
+ model_config = ExtendedConfigDict(force_skip_defaults=True)
 
  @field_validator("shift")
  @classmethod

diff --git a/qcelemental/models/basemodels.py b/qcelemental/models/basemodels.py
@@ -138,9 +138,9 @@ def model_dump(self, **kwargs) -> Dict[str, Any]:
 
  kwargs["exclude"] = (
  kwargs.get("exclude", None) or set()
- ) | self.__config__.serialize_default_excludes # type: ignore
- kwargs.setdefault("exclude_unset", self.__config__.serialize_skip_defaults) # type: ignore
- if self.__config__.force_skip_defaults: # type: ignore
+ ) | self.model_config["serialize_default_excludes"] # type: ignore
+ kwargs.setdefault("exclude_unset", self.model_config["serialize_skip_defaults"]) # type: ignore
+ if self.model_config["force_skip_defaults"]: # type: ignore
  kwargs["exclude_unset"] = True
 
  data = super().model_dump(**kwargs)

diff --git a/qcelemental/models/basis.py b/qcelemental/models/basis.py
@@ -38,7 +38,7 @@ class ElectronShell(ProtoModel):
  ...,
  description="General contraction coefficients for the shell; "
  "individual list components will be the individual segment contraction coefficients.",
- min_items=1,
+ min_length=1,
  )
 
  model_config = ExtendedConfigDict(json_schema_extra=electron_shell_json_schema_extra,
@@ -111,15 +111,15 @@ class ECPPotential(ProtoModel):
 
  ecp_type: ECPType = Field(..., description=str(ECPType.__doc__))
  angular_momentum: List[NonnegativeInt] = Field(
- ..., description="Angular momentum for the potential as an array of integers.", min_items=1
+ ..., description="Angular momentum for the potential as an array of integers.", min_length=1
  )
- r_exponents: List[int] = Field(..., description="Exponents of the 'r' term.", min_items=1)
- gaussian_exponents: List[float] = Field(..., description="Exponents of the 'gaussian' term.", min_items=1)
+ r_exponents: List[int] = Field(..., description="Exponents of the 'r' term.", min_length=1)
+ gaussian_exponents: List[float] = Field(..., description="Exponents of the 'gaussian' term.", min_length=1)
  coefficients: List[List[float]] = Field(
  ...,
  description="General contraction coefficients for the potential; "
  "individual list components will be the individual segment contraction coefficients.",
- min_items=1,
+ min_length=1,
  )
 
  model_config = ExtendedConfigDict(json_schema_extra=ecp_json_schema_extra,
@@ -163,6 +163,10 @@ class BasisCenter(ProtoModel):
  **ProtoModel.model_config)
 
 
+def basis_set_json_schema_extra(schema, model):
+ schema["$schema"] = qcschema_draft
+
+
 class BasisSet(ProtoModel):
  """
  A quantum chemistry basis description.
@@ -191,9 +195,9 @@ class BasisSet(ProtoModel):
  description="The number of basis functions. Use for convenience or as checksum",
  validate_default=True)
 
- class Config(ProtoModel.Config):
- def schema_extra(schema, model):
- schema["$schema"] = qcschema_draft
+ model_config = ExtendedConfigDict(**ProtoModel.model_config,
+  json_schema_extra=basis_set_json_schema_extra
+  )
 
  @field_validator("atom_map")
  @classmethod

diff --git a/qcelemental/models/common_models.py b/qcelemental/models/common_models.py
@@ -31,10 +31,12 @@ class Provenance(ProtoModel):
  )
  routine: str = Field("", description="The name of the routine or function within the creator, blank otherwise.")
 
- model_config = ExtendedConfigDict(canonical_repr=True,
- json_schema_extra=provenance_json_schema_extra,
- **ProtoModel.model_config,
- extra="allow")
+ model_config = ExtendedConfigDict(**{**ProtoModel.model_config,
+ **ExtendedConfigDict(canonical_repr=True,
+ json_schema_extra=provenance_json_schema_extra,
+ extra="allow")
+ }
+ )
 
 
 class Model(ProtoModel):
@@ -52,9 +54,11 @@ class Model(ProtoModel):
  )
 
  # basis_spec: BasisSpec = None # This should be exclusive with basis, but for now will be omitted
- model_config = ExtendedConfigDict(canonical_repr=True,
- **ProtoModel.model_config,
- extra="allow")
+ model_config = ExtendedConfigDict(**{**ProtoModel.model_config,
+ **ExtendedConfigDict(canonical_repr=True,
+ extra="allow")
+ }
+ )
 
 
 class DriverEnum(str, Enum):

diff --git a/qcelemental/models/molecule.py b/qcelemental/models/molecule.py
@@ -87,8 +87,9 @@ class Identifiers(ProtoModel):
  pubchem_sid: Optional[str] = Field(None, description="PubChem Substance ID")
  pubchem_conformerid: Optional[str] = Field(None, description="PubChem Conformer ID")
 
- model_config = ExtendedConfigDict(**ProtoModel.model_config,
- serialize_skip_defaults=True
+ model_config = ExtendedConfigDict(**{**ProtoModel.model_config,
+ **ExtendedConfigDict(serialize_skip_defaults=True)
+ }
  )
 
 
@@ -342,12 +343,15 @@ class Molecule(ProtoModel):
  description="Additional information to bundle with the molecule. Use for schema development and scratch space.",
  )
 
- model_config = ExtendedConfigDict(**ProtoModel.model_config,
- serialize_skip_defaults=True,
- repr_style=lambda self: [("name", self.name),
- ("formula", self.get_molecular_formula()),
- ("hash", self.get_hash()[:7])],
- json_schema_extra= molecule_json_schema_extras
+ model_config = ExtendedConfigDict(**{**ProtoModel.model_config,
+ **ExtendedConfigDict(serialize_skip_defaults=True,
+ repr_style=lambda self: [("name", self.name),
+ ("formula",
+ self.get_molecular_formula()),
+ ("hash", self.get_hash()[:7])],
+ json_schema_extra=molecule_json_schema_extras
+ )
+ }
  )
  # Alias fields are handled with the Field objects above
 

diff --git a/qcelemental/models/procedures.py b/qcelemental/models/procedures.py
@@ -15,6 +15,7 @@
  qcschema_optimization_output_default,
  qcschema_torsion_drive_input_default,
  qcschema_torsion_drive_output_default,
+ ExtendedConfigDict,
 )
 from .molecule import Molecule
 from .results import AtomicResult
@@ -43,8 +44,7 @@ class OptimizationProtocols(ProtoModel):
  TrajectoryProtocolEnum.all, description=str(TrajectoryProtocolEnum.__doc__)
  )
 
- class Config:
- force_skip_defaults = True
+ model_config = ExtendedConfigDict(force_skip_defaults=True)
 
 
 class QCInputSpecification(ProtoModel):