legacy file format: data type and version

VTK 9.6.0 has a weird change behavior when it comes to the legacy file format: The data type for OFFSETS and CONNECTIVITY of POLYDATA is vtktypeint64 after creation (using the Python bindings) but changes to long when saving+reading to VTKHDF. When writing without the detour via VTKHDF, the file has a mixture of C-types (double) and VTK-types (vtktypeint64), with the 9.6.0 change this is more consistent.

My understanding is that the VTK-specific types such as vtktypeint64 are fixed while long etc. can differ between C libraries/operating systems (see, e.g., https://stackoverflow.com/questions/63938192), so VTK-types seem to be the better choice.

Simple Legacy Formats - VTK documentation does not mention vtktypeint64 as a valid dataType. But it also states that the current version is 3.0 while VTK 9.5.2 and 9.6.0 write version 5.1. The link in VTK Legacy File Format Specification does not work anymore.

My questions would be

  • Which datatypes are supported?
  • Is there a preferred variant, e.g. vtktypeint64 or long?
  • Can I contribute to the documentation?

Can you share steps to reproduce that highlight the regression ?

@Louis_Gombert @lgivord

@MarDiehl a reproducer would be really appriciated.

here is a reproducer. It’s from a larger code base and a few things can be stripped down but I think it’s easy to follow.

#! /usr/bin/env python3

import os
import multiprocessing as mp
import logging
import contextlib
from pathlib import Path
from typing import Optional, Union, Literal, Sequence

import numpy as np

# needed for visualization but might not be available everywhere
# https://gitlab.kitware.com/vtk/vtk/-/issues/19687
with contextlib.suppress(ImportError):
    import vtkmodules.vtkRenderingOpenGL2                                                           # noqa

from vtkmodules.vtkCommonCore import (
    vtkVersion,
    vtkPoints,
    vtkStringArray,
    vtkLookupTable,
)

if has_vtkhdf := (np.lib.NumpyVersion(vtkVersion.GetVTKVersion()) >= '9.4.0'):
    from vtkmodules.vtkIOHDF import vtkHDFReader, vtkHDFWriter

from vtkmodules.vtkCommonDataModel import (
    vtkDataSet,
    vtkCellArray,
    vtkImageData,
    vtkRectilinearGrid,
    vtkUnstructuredGrid,
    vtkPointData,
    vtkCellData,
    vtkPolyData,
    VTK_LAGRANGE_TRIANGLE,
    VTK_LAGRANGE_QUADRILATERAL,
    VTK_LAGRANGE_TETRAHEDRON,
    VTK_LAGRANGE_HEXAHEDRON,
)
from vtkmodules.vtkIOLegacy import (
    vtkGenericDataObjectReader,
    vtkDataSetWriter,
)
from vtkmodules.vtkIOXML import (
    vtkXMLWriter,
    vtkXMLImageDataReader,
    vtkXMLImageDataWriter,
    vtkXMLRectilinearGridReader,
    vtkXMLRectilinearGridWriter,
    vtkXMLUnstructuredGridReader,
    vtkXMLUnstructuredGridWriter,
    vtkXMLPolyDataReader,
    vtkXMLPolyDataWriter,
)
from vtkmodules.vtkRenderingCore import (
    vtkDataSetMapper,
    vtkActor,
    vtkRenderer,
    vtkRenderWindow,
    vtkRenderWindowInteractor,
)
from vtkmodules.vtkRenderingAnnotation import (
    vtkScalarBarActor,
)
from vtkmodules.util.numpy_support import (
    numpy_to_vtk,
    numpy_to_vtkIdTypeArray,
    vtk_to_numpy,
)

class VTK:
    """
    Spatial visualization (and potentially manipulation).

    High-level interface to VTK.
    """

    def __init__(self,
                 vtk_data: vtkDataSet):
        """
        New spatial visualization.

        Parameters
        ----------
        vtk_data : subclass of vtkDataSet
            Description of geometry and topology, optionally with attached data.
            Valid types are vtkImageData, vtkUnstructuredGrid,
            vtkPolyData, and vtkRectilinearGrid.
        """
        self.vtk_data: vtkDataSet = vtk_data


    @staticmethod
    def from_poly_data(points: np.ndarray) -> 'VTK':
        """
        Create VTK of type polyData.

        This is the common type for point-wise data.

        Parameters
        ----------
        points : numpy.ndarray, shape (:,3)
            Spatial position of the points.

        Returns
        -------
        new : damask.VTK
            VTK-based geometry without nodal or cell data.
        """
        N = points.shape[0]
        vtk_points = vtkPoints()
        vtk_points.SetData(numpy_to_vtk(np.ascontiguousarray(points)))

        vtk_cells = vtkCellArray()
        vtk_cells.SetNumberOfCells(N)
        vtk_cells.SetCells(N,numpy_to_vtkIdTypeArray(np.stack((np.ones  (N,dtype=np.int64),
                                                               np.arange(N,dtype=np.int64)),axis=1).ravel(),deep=True))

        vtk_data = vtkPolyData()
        vtk_data.SetPoints(vtk_points)
        vtk_data.SetVerts(vtk_cells)

        return VTK(vtk_data)


    @staticmethod
    def load(fname: Union[str, Path],
             dataset_type: Literal[None, 'ImageData', 'UnstructuredGrid', 'PolyData', 'RectilinearGrid'] = None) -> 'VTK':
        """
        Load from VTK file.

        Parameters
        ----------
        fname : str or pathlib.Path
            Filename to read.
            Valid extensions are .vti, .vtu, .vtp, .vtr, vtkhdf, and .vtk.
        dataset_type : {'ImageData', 'UnstructuredGrid', 'PolyData', 'RectilinearGrid'}, optional
            Name of the vtkDataSet subclass when opening a .vtk file.

        Returns
        -------
        loaded : damask.VTK
            VTK-based geometry from file.

        Notes
        -----
        Loading VTKHDF files requires VTK 9.4 or later and presently supports
        PolyData, UnstructuredGrid, and ImageData. Loading ImageData is untested
        because VTK does not yet provide the functionality to write ImageData
        into VTKHDF format.
        """
        if not Path(fname).expanduser().is_file():                                                  # vtk has a strange error handling
            raise FileNotFoundError(f'file "{fname}" not found')

        if (ext := Path(fname).suffix) == '.vtk' or dataset_type is not None:
            reader_legacy = vtkGenericDataObjectReader()
            reader_legacy.SetFileName(str(Path(fname).expanduser()))
            reader_legacy.Update()
            if dataset_type is None:
                raise TypeError('missing dataset type for legacy VTK file')
            dtl = dataset_type.lower()
            vtk_data = (
                reader_legacy.GetStructuredPointsOutput() if dtl.endswith(('imagedata', 'image_data')) else
                reader_legacy.GetUnstructuredGridOutput() if dtl.endswith(('unstructuredgrid', 'unstructured_grid')) else
                reader_legacy.GetPolyDataOutput() if dtl.endswith(('polydata', 'poly_data')) else
                reader_legacy.GetRectilinearGridOutput() if dtl.endswith(('rectilineargrid', 'rectilinear_grid')) else
                None
            )
            if vtk_data is None:
                raise TypeError(f'unsupported VTK dataset type "{dataset_type}"')
        else:
            reader = (
                vtkXMLImageDataReader() if ext == '.vti' else
                vtkXMLUnstructuredGridReader() if ext == '.vtu' else
                vtkXMLPolyDataReader() if ext == '.vtp' else
                vtkXMLRectilinearGridReader() if ext == '.vtr' else
                vtkHDFReader() if (ext == '.vtkhdf') else
                None
            )
            if reader is None:
                raise TypeError(f'unsupported VTK file extension "{ext}"')
            reader.SetFileName(str(Path(fname).expanduser()))
            reader.Update()
            vtk_data = reader.GetOutputAsDataSet()

        return VTK(vtk_data)

    def as_ASCII(self) -> str:
        """ASCII representation of the VTK data."""
        writer = vtkDataSetWriter()
        writer.WriteToOutputStringOn()
        writer.SetInputData(self.vtk_data)
        writer.Write()
        return writer.GetOutputString()


    def save_VTKHDF(self,
                    fname: Union[str, Path]):
        """
        Save as VTKHDF file.

        Parameters
        ----------
        fname : str or pathlib.Path
            Filename to write.

        Notes
        -----
        Saving as VTKHDF file requires VTK 9.4 or later and only supports
        PolyData and UnstructuredGrid.
        """
        if not isinstance(self.vtk_data, (vtkPolyData, vtkUnstructuredGrid)):
            raise TypeError(f'unsupported vtk_data type "{type(self.vtk_data)}"')

        writer = vtkHDFWriter()
        ext = Path(fname).suffix
        writer.SetFileName(str(Path(fname).expanduser())+('.vtkhdf' if '.vtkhdf' != ext else ''))
        writer.SetInputData(self.vtk_data)
        writer.Write()


rng = np.random.default_rng(20191102)

points = rng.random((100,3))
v = VTK.from_poly_data(points)
v.save_VTKHDF('polyData')
vtkhdf = VTK.load('polyData.vtkhdf')

print(v.as_ASCII())
print(vtkhdf.as_ASCII())

assert (v.as_ASCII() == vtkhdf.as_ASCII())

Could you just share the .vtk file that exhibit the behavior when save into VTKHDF ?

attached are the two files (writing v.as_ASCII() and vtkhdf.as_ASCII()) to file).

diff polyData_detour.vtk polyData_orig.vtk gives:

41c41
< OFFSETS long
---
> OFFSETS vtktypeint64
54c54
< CONNECTIVITY long
---
> CONNECTIVITY vtktypeint64

polyData_detour.vtk (4.9 KB)
polyData_orig.vtk (4.9 KB)

Confirmed. the vtkHDFReader seems to have an impact here.

@Louis_Gombert @lgivord wdyt ?