Please document off-by-one for modern NumberOfPolys

Traditional ASCII and binary legacy files are documented here and here, while examples and description of the modern XML VTK file is here. However, recent VTK code will read and write variations of the old format that are not documented but are similar to the new XML format and share the same undocumented off-by-one error.

The Python code below illustrates this, creating both a classic legacy image (v2.0) and an undocumented legacy image (v5.1).

v2.0 reports POLYGONS 1081500 when storing 1081500 polygons:

# vtk DataFile Version 2.0
File written by itkPolyDataMeshIO
ASCII
DATASET POLYDATA
POINTS 540818 float
...
POLYGONS 1081500 4326000
...

v5.1 reports POLYGONS 72005 when storing 72004 polygons:

# vtk DataFile Version 5.1
vtk output
BINARY
DATASET POLYDATA
POINTS 36004 float
...
POLYGONS 72005 216012
OFFSETS vtktypeint64
...
CONNECTIVITY vtktypeint64

The surprising thing about the XML and v5.1 version of this format is that it reports n+1 polygons. You can see this clearly in the minimal XML ASCII example where NumberOfPolys="6" which refers to the number of offsets, but there are actually 5 polygons. This is a fence-post error: the format stores the starting offset for each polygon, but stores an additional offset to encode the end of the final polygon.

Curiously, when I showed ChatGPT a solution that pragmatically worked, it assured me that this was an error, even when provided with the VTK source code link. The format is popular, and popular tools like Slicer3D (which use these libraries) correctly read and write these meshes. However, it would be great to document this unexpected behavior, especially because the definition of POLYGONS n has changed from the legacy (v2.0) format to recent offset-based variations.

import sys
import numpy as np
import itk
import pyvista as pv
from scipy.ndimage import binary_closing

from vtkmodules.vtkCommonCore import vtkPoints
from vtkmodules.vtkCommonDataModel import vtkTriangle, vtkCellArray, vtkPolyData
from vtkmodules.util.numpy_support import numpy_to_vtk


def load_image(filename):
    image = itk.imread(filename, itk.F)
    return image

def binarize_image(image, threshold=0):
    arr = itk.GetArrayFromImage(image)
    binary_arr = (arr > threshold).astype(np.uint8)
    binary_image = itk.image_from_array(binary_arr)
    binary_image.CopyInformation(image)
    return binary_image

def cuberille_to_mesh(binary_image):
    PixelType = itk.UC
    Dimension = 3
    ImageType = itk.Image[PixelType, Dimension]
    MeshType = itk.Mesh[itk.SS, Dimension]
    MeshSourceType = itk.BinaryMask3DMeshSource[ImageType, MeshType]
    mesh_filter = MeshSourceType.New()
    mesh_filter.SetInput(binary_image)
    mesh_filter.Update()
    return mesh_filter.GetOutput()

def simplify_mesh(itk_mesh, reduction=0.5):
    points = np.array([itk_mesh.GetPoint(i) for i in range(itk_mesh.GetNumberOfPoints())])

    # Fallback: assume cuberille generated triangle cells in-order
    faces = []
    for i in range(0, itk_mesh.GetNumberOfCells()):
        try:
            ids = itk_mesh.GetCell(i).GetPointIds()
            if ids.Size() == 3:
                faces.append([ids.GetId(0), ids.GetId(1), ids.GetId(2)])
        except:
            continue

    if not faces:
        raise RuntimeError("No triangles found — mesh may be empty or not in supported format")

    faces_np = np.array(faces, dtype=np.int32)
    pv_mesh = pv.PolyData(points, faces_np)
    return pv_mesh.decimate_pro(reduction)


    pv_mesh = pv.wrap(poly)
    return pv_mesh.decimate_pro(reduction)

def main(input_nii, output_path, reduction=None):
    tmp_vtk = "tmp_mesh.vtk"

    # Step 1: ITK processing and mesh writing
    image = load_image(input_nii)
    bin_img = binarize_image(image)
    closed_arr = binary_closing(itk.GetArrayFromImage(bin_img), iterations=1).astype(np.uint8)
    closed_img = itk.image_from_array(closed_arr)
    closed_img.CopyInformation(image)

    print("Generating mesh with cuberille...")
    itk_mesh = cuberille_to_mesh(closed_img)

    print(f"Saving intermediate mesh to {tmp_vtk}")
    writer = itk.MeshFileWriter[type(itk_mesh)].New()
    writer.SetInput(itk_mesh)
    writer.SetFileName(tmp_vtk)
    writer.Update()

    # Step 2: Load intermediate mesh with PyVista
    mesh = pv.read(tmp_vtk)

    if reduction:
        print(f"Simplifying mesh with reduction={reduction}")
        mesh = mesh.decimate_pro(reduction)

    print(f"Saving final mesh to {output_path}")
    mesh.save(output_path, binary=True)


if __name__ == '__main__':
    if len(sys.argv) < 3 or len(sys.argv) > 4:
        print("Usage: python nii2mesh.py input.nii.gz output.vtk [reduction (0..1)]")
        sys.exit(1)
    reduction = float(sys.argv[3]) if len(sys.argv) == 4 else None
    main(sys.argv[1], sys.argv[2], reduction)
1 Like

Hello,

You can edit the Doxygen comment of cited method : VTK/IO/Legacy/vtkDataWriter.h at 31710fe6f0a2020f320f8ff8b25cc2bac264d4d4 · Kitware/VTK · GitHub and submit a pull request.

best,

PC