theochem · tovrstra · Mar 24, 2021
diff --git a/iodata/formats/json.py b/iodata/formats/json.py
@@ -39,7 +39,7 @@
 import numpy as np
 
 from ..docstrings import document_dump_one, document_load_one
-from ..iodata import IOData
+from ..iodata import IOData, BOND_DTYPE
 from ..periodic import num2sym, sym2num
 from ..utils import FileFormatError, FileFormatWarning, LineIterator
 from ..version import __version__
@@ -314,7 +314,8 @@ def _parse_topology_keys(mol: dict, lit: LineIterator) -> dict:
  # Note: The QCSchema spec allows for non-integer bond_orders, these are forced to integers here
  # in accordance with current IOData specification
  if "connectivity" in mol:
- topology_dict["bonds"] = np.array(mol["connectivity"], dtype=int)
+ topology_dict["bonds"] = np.array([
+ tuple(bond) for bond in mol["connectivity"]], dtype=BOND_DTYPE)
  # Check for fragment keys
  # List fragment indices in nested list (likely is a jagged array)
  if "fragments" in mol:
@@ -565,7 +566,10 @@ def _dump_qcschema_molecule(data: IOData) -> dict:
  if data.atmasses is not None:
  molecule_dict["masses"] = data.atmasses.tolist()
  if data.bonds is not None:
- molecule_dict["connectivity"] = [[int(i) for i in bond] for bond in data.bonds]
+ molecule_dict["connectivity"] = [
+ [int(bond["i"]), int(bond["j"]), float(bond["bo"])]
+ for bond in data.bonds
+ ]
  if data.g_rot:
  molecule_dict["fix_symmetry"] = data.g_rot
 

diff --git a/iodata/formats/mol2.py b/iodata/formats/mol2.py
@@ -29,7 +29,7 @@
 
 from ..docstrings import (document_load_one, document_load_many, document_dump_one,
  document_dump_many)
-from ..iodata import IOData
+from ..iodata import IOData, BOND_DTYPE
 from ..periodic import sym2num, num2sym
 from ..utils import angstrom, LineIterator
 
@@ -110,17 +110,13 @@ def _load_helper_atoms(lit: LineIterator, natoms: int)\
 
 def _load_helper_bonds(lit: LineIterator, nbonds: int) -> Tuple[np.ndarray]:
  """Load bond information."""
- bonds = np.empty((nbonds, 3))
+ bonds = np.empty(nbonds, dtype=BOND_DTYPE)
  for i in range(nbonds):
  words = next(lit).split()
  if words[3] == 'am':
- words[3] = 4
+ words[3] = 1.5
  # Substract one because of numbering starting at 0
- bond = [int(words[1]) - 1, int(words[2]) - 1, int(words[3])]
- if bond is None:
- bond = [0, 0, 0]
- lit.warn(f'Something wrong in the bond section: {bond}')
- bonds[i] = bond
+ bonds[i] = int(words[1]) - 1, int(words[2]) - 1, float(words[3])
  return bonds
 
 
@@ -163,8 +159,8 @@ def dump_one(f: TextIO, data: IOData):
  if data.bonds is not None:
  print("@<TRIPOS>BOND", file=f)
  for i, bond in enumerate(data.bonds):
- bondtype = 'am' if bond[2] == 4 else str(bond[2])
- print(f'{i+1:6d} {bond[0]+1:4d} {bond[1]+1:4d} {bondtype:2s}',
+ bondtype = 'am' if 1.4 < bond["bo"] < 1.6 else str(int(np.round(bond[2])))
+ print(f'{i+1:6d} {bond["i"]+1:4d} {bond["j"]+1:4d} {bondtype:2s}',
  file=f)
 
 

diff --git a/iodata/iodata.py b/iodata/iodata.py
@@ -31,6 +31,9 @@
 __all__ = ['IOData']
 
 
+BOND_DTYPE = np.dtype([("i", int), ("j", int), ("bo", float)])
+
+
 # pylint: disable=too-many-instance-attributes
 @attr.s(auto_attribs=True, slots=True,
  on_setattr=[attr.setters.validate, attr.setters.convert])
@@ -78,10 +81,10 @@ class IOData:
  specific atom in a molecule). The format of the values is to be decided
  when implementing a load function for basis set definitions.
  bonds
- An (nbond, 3) array with the list of covalent bonds. Each row represents
- one bond and consists of three integers: first atom index (starting
- from zero), second atom index & an optional bond type (0: not known, 1:
- single, 2: double, 3: triple, 4: conjugated).
+ An (nbond, ) array of dtype BOND_DTYPE, with the list of covalent bonds.
+ Each element represents one bond and consists of two integers and a
+ float: first atom index (starting from zero), second atom index & a
+ optional bond order. The default value of the bond order is 1.0.
  cellvecs
  A (NP, 3) array containing the (real-space) cell vectors describing
  periodic boundary conditions. A single vector corresponds to a 1D cell,
@@ -195,8 +198,8 @@ class IOData:
  validator=attr.validators.optional(validate_shape('natom')))
  basisdef: str = None
  bonds: np.ndarray = attr.ib(
- default=None, converter=convert_array_to(int),
- validator=attr.validators.optional(validate_shape(None, 3)))
+ default=None, converter=convert_array_to(BOND_DTYPE),
+ validator=attr.validators.optional(validate_shape(None,)))
  cellvecs: np.ndarray = attr.ib(
  default=None, converter=convert_array_to(float),
  validator=attr.validators.optional(validate_shape(None, 3)))

diff --git a/iodata/test/test_mol2.py b/iodata/test/test_mol2.py
@@ -20,11 +20,13 @@
 
 import os
 
+import numpy as np
 import pytest
 from numpy.testing import assert_equal, assert_allclose
 
 from .common import truncated_file
 from ..api import load_one, load_many, dump_one, dump_many
+from ..iodata import BOND_DTYPE
 from ..utils import angstrom
 try:
  from importlib_resources import path
@@ -71,8 +73,11 @@ def check_example(mol):
  assert_allclose(mol.atcharges['mol2charges'][23], 0.0949)
  bonds = mol.bonds
  assert_equal(len(bonds), 25)
- assert_allclose(bonds[0], [0, 1, 1])
- assert_allclose(bonds[24], [13, 23, 1])
+ print(bonds[0])
+ assert bonds[0] == np.array((0, 1, 1.), BOND_DTYPE)
+ assert bonds[6] == np.array((2, 3, 2.0), BOND_DTYPE)
+ assert bonds[13] == np.array((6, 8, 1.5), BOND_DTYPE)
+ assert bonds[24] == np.array((13, 23, 1.0), BOND_DTYPE)
 
 
 def check_load_dump_consistency(tmpdir, fn):