如下为table manifest中列出的数据文件和删除文件的接口。DataFile接口重载了两个函数:DataFile文件中存储的内容类型为DATA;equalityFieldIds函数直接返回null。DeleteFile接口重置了splitOffsets函数,返回null。
/* Interface for data files listed in a table manifest. */
public interface DataFile extends ContentFile<DataFile> {
// fields for adding delete data files
Types.NestedField CONTENT = optional(134, "content", IntegerType.get(),"Contents of the file: 0=data, 1=position deletes, 2=equality deletes");
Types.NestedField FILE_PATH = required(100, "file_path", StringType.get(), "Location URI with FS scheme");
Types.NestedField FILE_FORMAT = required(101, "file_format", StringType.get(),"File format name: avro, orc, or parquet");
Types.NestedField RECORD_COUNT = required(103, "record_count", LongType.get(), "Number of records in the file");
Types.NestedField FILE_SIZE = required(104, "file_size_in_bytes", LongType.get(), "Total file size in bytes");
Types.NestedField COLUMN_SIZES = optional(108, "column_sizes", MapType.ofRequired(117, 118, IntegerType.get(), LongType.get()), "Map of column id to total size on disk");
Types.NestedField VALUE_COUNTS = optional(109, "value_counts", MapType.ofRequired(119, 120, IntegerType.get(), LongType.get()), "Map of column id to total count, including null and NaN");
Types.NestedField NULL_VALUE_COUNTS = optional(110, "null_value_counts", MapType.ofRequired(121, 122, IntegerType.get(), LongType.get()), "Map of column id to null value count");
Types.NestedField NAN_VALUE_COUNTS = optional(137, "nan_value_counts", MapType.ofRequired(138, 139, IntegerType.get(), LongType.get()), "Map of column id to number of NaN values in the column");
Types.NestedField LOWER_BOUNDS = optional(125, "lower_bounds", MapType.ofRequired(126, 127, IntegerType.get(), BinaryType.get()), "Map of column id to lower bound");
Types.NestedField UPPER_BOUNDS = optional(128, "upper_bounds", MapType.ofRequired(129, 130, IntegerType.get(), BinaryType.get()), "Map of column id to upper bound");
Types.NestedField KEY_METADATA = optional(131, "key_metadata", BinaryType.get(), "Encryption key metadata blob");
Types.NestedField SPLIT_OFFSETS = optional(132, "split_offsets", ListType.ofRequired(133, LongType.get()), "Splittable offsets");
Types.NestedField EQUALITY_IDS = optional(135, "equality_ids", ListType.ofRequired(136, IntegerType.get()), "Equality comparison field IDs");
Types.NestedField SORT_ORDER_ID = optional(140, "sort_order_id", IntegerType.get(), "Sort order ID");
Types.NestedField SPEC_ID = optional(141, "spec_id", IntegerType.get(), "Partition spec ID");
int PARTITION_ID = 102;
String PARTITION_NAME = "partition";
String PARTITION_DOC = "Partition data tuple, schema based on the partition spec";
// NEXT ID TO ASSIGN: 142
static StructType getType(StructType partitionType) { // IDs start at 100 to leave room for changes to ManifestEntry
return StructType.of( CONTENT, FILE_PATH, FILE_FORMAT, SPEC_ID, required(PARTITION_ID, PARTITION_NAME, partitionType, PARTITION_DOC), RECORD_COUNT, FILE_SIZE, COLUMN_SIZES, VALUE_COUNTS, NULL_VALUE_COUNTS, NAN_VALUE_COUNTS, LOWER_BOUNDS, UPPER_BOUNDS, KEY_METADATA, SPLIT_OFFSETS, EQUALITY_IDS, SORT_ORDER_ID); }
@Override
default FileContent content() { return FileContent.DATA; } /* @return the content stored in the file; one of DATA, POSITION_DELETES, or EQUALITY_DELETES */
@Override
default List<Integer> equalityFieldIds() { return null; }
}
/* Interface for delete files listed in a table delete manifest. */
public interface DeleteFile extends ContentFile<DeleteFile> {
/* @return List of recommended split locations, if applicable, null otherwise. When available, this information is used for planning scan tasks whose boundaries are determined by these offsets. The returned list must be sorted in ascending order. */
@Override
default List<Long> splitOffsets() { return null; }
}