package org.apache.spark.sql.execution.datasources.parquet;

import com.google.common.annotations.VisibleForTesting;
import java.io.IOException;
import java.time.ZoneId;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.page.PageReadStore;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.Type;
import org.apache.spark.SparkUnsupportedOperationException;
import org.apache.spark.memory.MemoryMode;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns;
import org.apache.spark.sql.execution.datasources.parquet.ParquetRowIndexUtil;
import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase;
import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils;
import org.apache.spark.sql.execution.vectorized.ConstantColumnVector;
import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector;
import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.vectorized.ColumnVector;
import org.apache.spark.sql.vectorized.ColumnarBatch;
import scala.Option;
import scala.jdk.javaapi.CollectionConverters;

/* loaded from: input_file:org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.class */
public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBase<Object> {
    private int capacity;
    private int batchIdx;
    private int numBatched;
    private ParquetColumnVector[] columnVectors;
    private long rowsReturned;
    private long totalCountLoadedSoFar;
    private Set<ParquetColumn> missingColumns;
    private final ZoneId convertTz;
    private final String datetimeRebaseMode;
    private final String datetimeRebaseTz;
    private final String int96RebaseMode;
    private final String int96RebaseTz;
    private ColumnarBatch columnarBatch;
    private boolean returnColumnarBatch;
    private ParquetRowIndexUtil.RowIndexGenerator rowIndexGenerator;
    private final MemoryMode MEMORY_MODE;

    public VectorizedParquetRecordReader(ZoneId zoneId, String str, String str2, String str3, String str4, boolean z, int i) {
        this.batchIdx = 0;
        this.numBatched = 0;
        this.totalCountLoadedSoFar = 0L;
        this.rowIndexGenerator = null;
        this.convertTz = zoneId;
        this.datetimeRebaseMode = str;
        this.datetimeRebaseTz = str2;
        this.int96RebaseMode = str3;
        this.int96RebaseTz = str4;
        this.MEMORY_MODE = z ? MemoryMode.OFF_HEAP : MemoryMode.ON_HEAP;
        this.capacity = i;
    }

    public VectorizedParquetRecordReader(boolean z, int i) {
        this(null, "CORRECTED", "UTC", "LEGACY", ZoneId.systemDefault().getId(), z, i);
    }

    @Override // org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException {
        super.initialize(inputSplit, taskAttemptContext);
        initializeInternal();
    }

    @Override // org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext, Option<ParquetMetadata> option) throws IOException, InterruptedException, UnsupportedOperationException {
        super.initialize(inputSplit, taskAttemptContext, option);
        initializeInternal();
    }

    @Override // org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
    public void initialize(String str, List<String> list) throws IOException, UnsupportedOperationException {
        super.initialize(str, list);
        initializeInternal();
    }

    @Override // org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
    @VisibleForTesting
    public void initialize(MessageType messageType, MessageType messageType2, SpecificParquetRecordReaderBase.ParquetRowGroupReader parquetRowGroupReader, int i) throws IOException {
        super.initialize(messageType, messageType2, parquetRowGroupReader, i);
        initializeInternal();
    }

    @Override // org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
    public void close() throws IOException {
        if (this.columnarBatch != null) {
            this.columnarBatch.close();
            this.columnarBatch = null;
        }
        super.close();
    }

    public boolean nextKeyValue() throws IOException {
        resultBatch();
        if (this.returnColumnarBatch) {
            return nextBatch();
        }
        if (this.batchIdx >= this.numBatched && !nextBatch()) {
            return false;
        }
        this.batchIdx++;
        return true;
    }

    public Object getCurrentValue() {
        return this.returnColumnarBatch ? this.columnarBatch : this.columnarBatch.getRow(this.batchIdx - 1);
    }

    public float getProgress() {
        return ((float) this.rowsReturned) / ((float) this.totalRowCount);
    }

    private void initBatch(MemoryMode memoryMode, StructType structType, InternalRow internalRow) {
        StructType structType2 = new StructType(this.sparkSchema.fields());
        int i = 0;
        if (structType != null) {
            for (StructField structField : structType.fields()) {
                structType2 = structType2.add(structField);
            }
            i = structType.fields().length;
        }
        ColumnVector[] allocateColumns = allocateColumns(this.capacity, structType2, memoryMode == MemoryMode.OFF_HEAP, i);
        this.columnarBatch = new ColumnarBatch(allocateColumns);
        this.columnVectors = new ParquetColumnVector[this.sparkSchema.fields().length];
        for (int i2 = 0; i2 < this.columnVectors.length; i2++) {
            Object obj = null;
            if (this.sparkRequestedSchema != null) {
                obj = ResolveDefaultColumns.existenceDefaultValues(this.sparkRequestedSchema)[i2];
            }
            this.columnVectors[i2] = new ParquetColumnVector((ParquetColumn) this.parquetColumn.children().apply(i2), (WritableColumnVector) allocateColumns[i2], this.capacity, memoryMode, this.missingColumns, true, obj);
        }
        if (structType != null) {
            int length = this.sparkSchema.fields().length;
            for (int i3 = 0; i3 < structType.fields().length; i3++) {
                ColumnVectorUtils.populate((ConstantColumnVector) allocateColumns[i3 + length], internalRow, i3);
            }
        }
        this.rowIndexGenerator = ParquetRowIndexUtil.createGeneratorIfNeeded(this.sparkSchema);
    }

    private void initBatch() {
        initBatch(this.MEMORY_MODE, null, null);
    }

    public void initBatch(StructType structType, InternalRow internalRow) {
        initBatch(this.MEMORY_MODE, structType, internalRow);
    }

    public ColumnarBatch resultBatch() {
        if (this.columnarBatch == null) {
            initBatch();
        }
        return this.columnarBatch;
    }

    public void enableReturningBatches() {
        this.returnColumnarBatch = true;
    }

    public boolean nextBatch() throws IOException {
        for (ParquetColumnVector parquetColumnVector : this.columnVectors) {
            parquetColumnVector.reset();
        }
        this.columnarBatch.setNumRows(0);
        if (this.rowsReturned >= this.totalRowCount) {
            return false;
        }
        checkEndOfRowGroup();
        int min = (int) Math.min(this.capacity, this.totalCountLoadedSoFar - this.rowsReturned);
        for (ParquetColumnVector parquetColumnVector2 : this.columnVectors) {
            for (ParquetColumnVector parquetColumnVector3 : parquetColumnVector2.getLeaves()) {
                VectorizedColumnReader columnReader = parquetColumnVector3.getColumnReader();
                if (columnReader != null) {
                    columnReader.readBatch(min, parquetColumnVector3.getValueVector(), parquetColumnVector3.getRepetitionLevelVector(), parquetColumnVector3.getDefinitionLevelVector());
                }
            }
            parquetColumnVector2.assemble();
        }
        if (this.rowIndexGenerator != null) {
            this.rowIndexGenerator.populateRowIndex(this.columnVectors, min);
        }
        this.rowsReturned += min;
        this.columnarBatch.setNumRows(min);
        this.numBatched = min;
        this.batchIdx = 0;
        return true;
    }

    private void initializeInternal() throws IOException, UnsupportedOperationException {
        this.missingColumns = new HashSet();
        Iterator it = CollectionConverters.asJava(this.parquetColumn.children()).iterator();
        while (it.hasNext()) {
            checkColumn((ParquetColumn) it.next());
        }
    }

    private void checkColumn(ParquetColumn parquetColumn) throws IOException {
        String[] strArr = (String[]) CollectionConverters.asJava(parquetColumn.path()).toArray(new String[0]);
        if (!containsPath(this.fileSchema, strArr)) {
            if (parquetColumn.required()) {
                throw new IOException("Required column is missing in data file. Col: " + Arrays.toString(strArr));
            }
            this.missingColumns.add(parquetColumn);
        } else if (parquetColumn.isPrimitive()) {
            ColumnDescriptor columnDescriptor = (ColumnDescriptor) parquetColumn.descriptor().get();
            if (!this.fileSchema.getColumnDescription(columnDescriptor.getPath()).equals(columnDescriptor)) {
                throw new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_3185");
            }
        } else {
            Iterator it = CollectionConverters.asJava(parquetColumn.children()).iterator();
            while (it.hasNext()) {
                checkColumn((ParquetColumn) it.next());
            }
        }
    }

    private boolean containsPath(Type type, String[] strArr) {
        return containsPath(type, strArr, 0);
    }

    private boolean containsPath(Type type, String[] strArr, int i) {
        if (strArr.length == i) {
            return true;
        }
        if (!(type instanceof GroupType)) {
            return false;
        }
        GroupType groupType = (GroupType) type;
        String str = strArr[i];
        if (groupType.containsField(str)) {
            return containsPath(groupType.getType(str), strArr, i + 1);
        }
        return false;
    }

    private void checkEndOfRowGroup() throws IOException {
        if (this.rowsReturned != this.totalCountLoadedSoFar) {
            return;
        }
        PageReadStore readNextRowGroup = this.reader.readNextRowGroup();
        if (readNextRowGroup == null) {
            long j = this.rowsReturned;
            long j2 = this.totalRowCount;
            IOException iOException = new IOException("expecting more rows but reached last block. Read " + j + " out of " + iOException);
            throw iOException;
        }
        if (this.rowIndexGenerator != null) {
            this.rowIndexGenerator.initFromPageReadStore(readNextRowGroup);
        }
        for (ParquetColumnVector parquetColumnVector : this.columnVectors) {
            initColumnReader(readNextRowGroup, parquetColumnVector);
        }
        this.totalCountLoadedSoFar += readNextRowGroup.getRowCount();
    }

    private void initColumnReader(PageReadStore pageReadStore, ParquetColumnVector parquetColumnVector) throws IOException {
        if (this.missingColumns.contains(parquetColumnVector.getColumn())) {
            return;
        }
        if (parquetColumnVector.getColumn().isPrimitive()) {
            ParquetColumn column = parquetColumnVector.getColumn();
            parquetColumnVector.setColumnReader(new VectorizedColumnReader((ColumnDescriptor) column.descriptor().get(), column.required(), pageReadStore, this.convertTz, this.datetimeRebaseMode, this.datetimeRebaseTz, this.int96RebaseMode, this.int96RebaseTz, this.writerVersion));
        } else {
            Iterator<ParquetColumnVector> it = parquetColumnVector.getChildren().iterator();
            while (it.hasNext()) {
                initColumnReader(pageReadStore, it.next());
            }
        }
    }

    private ColumnVector[] allocateColumns(int i, StructType structType, boolean z, int i2) {
        StructField[] fields = structType.fields();
        int length = fields.length;
        ColumnVector[] columnVectorArr = new ColumnVector[length];
        if (z) {
            for (int i3 = 0; i3 < length - i2; i3++) {
                columnVectorArr[i3] = new OffHeapColumnVector(i, fields[i3].dataType());
            }
        } else {
            for (int i4 = 0; i4 < length - i2; i4++) {
                columnVectorArr[i4] = new OnHeapColumnVector(i, fields[i4].dataType());
            }
        }
        for (int i5 = length - i2; i5 < length; i5++) {
            columnVectorArr[i5] = new ConstantColumnVector(i, fields[i5].dataType());
        }
        return columnVectorArr;
    }
}
