Defined Formats

IEEE 754 Formats

gfloat.formats.format_info_binary16 = FormatInfo(name='binary16', k=16, precision=11, emax=15, has_nz=True, has_infs=True, num_high_nans=1023, has_subnormals=True, is_signed=True, is_twos_complement=False)

FormatInfo for IEEE-754 Binary16 format

gfloat.formats.format_info_binary32 = FormatInfo(name='binary32', k=32, precision=24, emax=127, has_nz=True, has_infs=True, num_high_nans=8388607, has_subnormals=True, is_signed=True, is_twos_complement=False)

FormatInfo for IEEE-754 Binary32 format

gfloat.formats.format_info_binary64 = FormatInfo(name='binary64', k=64, precision=53, emax=1023, has_nz=True, has_infs=True, num_high_nans=4503599627370495, has_subnormals=True, is_signed=True, is_twos_complement=False)

FormatInfo for IEEE-754 Binary64 format

BFloat16

gfloat.formats.format_info_bfloat16 = FormatInfo(name='bfloat16', k=16, precision=8, emax=127, has_nz=True, has_infs=True, num_high_nans=127, has_subnormals=True, is_signed=True, is_twos_complement=False)

FormatInfo for Google BFloat16 format

Open Compute Platform (OCP) Formats

gfloat.formats.format_info_ocp_e5m2 = FormatInfo(name='ocp_e5m2', k=8, precision=3, emax=15, has_nz=True, has_infs=True, num_high_nans=3, has_subnormals=True, is_signed=True, is_twos_complement=False)

FormatInfo for OCP E5M2 format

gfloat.formats.format_info_ocp_e4m3 = FormatInfo(name='ocp_e4m3', k=8, precision=4, emax=8, has_nz=True, has_infs=False, num_high_nans=1, has_subnormals=True, is_signed=True, is_twos_complement=False)

FormatInfo for OCP E4M3 format

gfloat.formats.format_info_ocp_e3m2 = FormatInfo(name='ocp_e3m2', k=6, precision=3, emax=4, has_nz=True, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=False)

FormatInfo for OCP MX E3M2 format

gfloat.formats.format_info_ocp_e2m3 = FormatInfo(name='ocp_e2m3', k=6, precision=4, emax=2, has_nz=True, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=False)

FormatInfo for OCP MX E2M3 format

gfloat.formats.format_info_ocp_e2m1 = FormatInfo(name='ocp_e2m1', k=4, precision=2, emax=2, has_nz=True, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=False)

FormatInfo for OCP MX E2M1 format

gfloat.formats.format_info_ocp_e8m0 = FormatInfo(name='ocp_e8m0', k=8, precision=1, emax=127, has_nz=False, has_infs=False, num_high_nans=1, has_subnormals=False, is_signed=False, is_twos_complement=False)

FormatInfo for OCP MX E8M0 format

gfloat.formats.format_info_ocp_int8 = FormatInfo(name='ocp_int8', k=8, precision=8, emax=0, has_nz=False, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=True)

FormatInfo for OCP MX INT8 format

IEEE WG P3109 Formats

gfloat.formats.format_info_p3109(precision)[source]

FormatInfo for P3109 P{p} formats

Parameters:

p (int) – Precision in bits

Returns:

FormatInfo class describing the format

Raises:

ValueError – If p is not in 1..7

Block Formats

gfloat.formats.format_info_mxfp8_e5m2 = BlockFormatInfo(name='mxfp8_e5m2', etype=FormatInfo(name='ocp_e5m2', k=8, precision=3, emax=15, has_nz=True, has_infs=True, num_high_nans=3, has_subnormals=True, is_signed=True, is_twos_complement=False), k=32, stype=FormatInfo(name='ocp_e8m0', k=8, precision=1, emax=127, has_nz=False, has_infs=False, num_high_nans=1, has_subnormals=False, is_signed=False, is_twos_complement=False))

gfloat.types.FormatInfo, k: int, stype: gfloat.types.FormatInfo)

Type:

BlockFormatInfo(name

Type:

str, etype

gfloat.formats.format_info_mxfp8_e4m3 = BlockFormatInfo(name='mxfp8_e4m3', etype=FormatInfo(name='ocp_e4m3', k=8, precision=4, emax=8, has_nz=True, has_infs=False, num_high_nans=1, has_subnormals=True, is_signed=True, is_twos_complement=False), k=32, stype=FormatInfo(name='ocp_e8m0', k=8, precision=1, emax=127, has_nz=False, has_infs=False, num_high_nans=1, has_subnormals=False, is_signed=False, is_twos_complement=False))

gfloat.types.FormatInfo, k: int, stype: gfloat.types.FormatInfo)

Type:

BlockFormatInfo(name

Type:

str, etype

gfloat.formats.format_info_mxfp6_e3m2 = BlockFormatInfo(name='mxfp6_e3m2', etype=FormatInfo(name='ocp_e3m2', k=6, precision=3, emax=4, has_nz=True, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=False), k=32, stype=FormatInfo(name='ocp_e8m0', k=8, precision=1, emax=127, has_nz=False, has_infs=False, num_high_nans=1, has_subnormals=False, is_signed=False, is_twos_complement=False))

gfloat.types.FormatInfo, k: int, stype: gfloat.types.FormatInfo)

Type:

BlockFormatInfo(name

Type:

str, etype

gfloat.formats.format_info_mxfp6_e2m3 = BlockFormatInfo(name='mxfp6_e2m3', etype=FormatInfo(name='ocp_e2m3', k=6, precision=4, emax=2, has_nz=True, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=False), k=32, stype=FormatInfo(name='ocp_e8m0', k=8, precision=1, emax=127, has_nz=False, has_infs=False, num_high_nans=1, has_subnormals=False, is_signed=False, is_twos_complement=False))

gfloat.types.FormatInfo, k: int, stype: gfloat.types.FormatInfo)

Type:

BlockFormatInfo(name

Type:

str, etype

gfloat.formats.format_info_mxfp4_e2m1 = BlockFormatInfo(name='mxfp4_e2m1', etype=FormatInfo(name='ocp_e2m1', k=4, precision=2, emax=2, has_nz=True, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=False), k=32, stype=FormatInfo(name='ocp_e8m0', k=8, precision=1, emax=127, has_nz=False, has_infs=False, num_high_nans=1, has_subnormals=False, is_signed=False, is_twos_complement=False))

gfloat.types.FormatInfo, k: int, stype: gfloat.types.FormatInfo)

Type:

BlockFormatInfo(name

Type:

str, etype

gfloat.formats.format_info_mxint8 = BlockFormatInfo(name='mxint8', etype=FormatInfo(name='ocp_int8', k=8, precision=8, emax=0, has_nz=False, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=True), k=32, stype=FormatInfo(name='ocp_e8m0', k=8, precision=1, emax=127, has_nz=False, has_infs=False, num_high_nans=1, has_subnormals=False, is_signed=False, is_twos_complement=False))

gfloat.types.FormatInfo, k: int, stype: gfloat.types.FormatInfo)

Type:

BlockFormatInfo(name

Type:

str, etype