Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Threaded View
Drill, mail # dev - [1/2] git commit: DRILL 211 - index out of bounds error in parquet reader.


Copy link to this message
-
[1/2] git commit: DRILL 211 - index out of bounds error in parquet reader.
jacques@... 2013-09-06, 06:04
Updated Branches:
  refs/heads/master 98bc9e19c -> fef22041b
DRILL 211 - index out of bounds error in parquet reader.
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/60e2080f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/60e2080f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/60e2080f

Branch: refs/heads/master
Commit: 60e2080fa557cddfe8146a706612444724efe716
Parents: 98bc9e1
Author: Jason Altekruse <[EMAIL PROTECTED]>
Authored: Fri Sep 6 00:53:50 2013 -0500
Committer: Jacques Nadeau <[EMAIL PROTECTED]>
Committed: Thu Sep 5 23:01:15 2013 -0700

----------------------------------------------------------------------
 .../exec/store/parquet/VarLenBinaryReader.java  |  5 --
 .../exec/store/ParquetRecordReaderTest.java     | 49 +++++++++++++++++++-
 2 files changed, 48 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/60e2080f/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/VarLenBinaryReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/VarLenBinaryReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/VarLenBinaryReader.java
index f20a2f3..3286314 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/VarLenBinaryReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/VarLenBinaryReader.java
@@ -116,7 +116,6 @@ public class VarLenBinaryReader {
         columnReader.dataTypeLengthInBits = BytesUtils.readIntLittleEndian(bytes,
             (int) columnReader.pageReadStatus.readPosInBytes);
         lengthVarFieldsInCurrentRecord += columnReader.dataTypeLengthInBits;
-
       }
       for (NullableVarLengthColumn columnReader : nullableColumns) {
         if (columnReader.pageReadStatus.currentPage == null
@@ -162,10 +161,6 @@ public class VarLenBinaryReader {
         columnReader.pageReadStatus.valuesRead++;
         columnReader.valuesReadInCurrentPass++;
         currVec.getMutator().setValueCount((int)recordsReadInCurrentPass);
-        // reached the end of a page
-        if ( columnReader.pageReadStatus.valuesRead == columnReader.pageReadStatus.currentPage.getValueCount()) {
-          columnReader.pageReadStatus.next();
-        }
       }
       for (NullableVarLengthColumn columnReader : nullableColumns) {
         bytes = columnReader.pageReadStatus.pageDataByteArray;

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/60e2080f/exec/java-exec/src/test/java/org/apache/drill/exec/store/ParquetRecordReaderTest.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/ParquetRecordReaderTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/ParquetRecordReaderTest.java
index cf790ac..93f1f73 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/ParquetRecordReaderTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/ParquetRecordReaderTest.java
@@ -40,7 +40,6 @@ import org.apache.drill.exec.server.Drillbit;
 import org.apache.drill.exec.server.RemoteServiceSet;
 
 import org.apache.drill.exec.store.json.JsonSchemaProvider;
-import org.apache.drill.exec.store.parquet.ParquetStorageEngine;
 import org.apache.drill.exec.vector.BaseDataValueVector;
 import org.apache.drill.exec.vector.ValueVector;
 import org.apache.hadoop.conf.Configuration;
@@ -93,6 +92,29 @@ public class ParquetRecordReaderTest {
     props.fields.put("bin2", new FieldInfo("binary", "bin2", -1, bin2Vals, TypeProtos.MinorType.VARBINARY, props));
   }
 
+  private void populatePigTPCHCustomerFields(ParquetTestProperties props){
+    // all of the data in the fieldInfo constructors doesn't matter because the file is generated outside the test
+    props.fields.put("C_CUSTKEY", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props));
+    props.fields.put("C_NATIONKEY", new FieldInfo("int64", "bigInt", 64, longVals, TypeProtos.MinorType.BIGINT, props));
+    props.fields.put("C_ACCTBAL", new FieldInfo("float", "f", 32, floatVals, TypeProtos.MinorType.FLOAT4, props));
+    props.fields.put("C_NAME", new FieldInfo("double", "d", 64, doubleVals, TypeProtos.MinorType.FLOAT8, props));
+    props.fields.put("C_ADDRESS", new FieldInfo("boolean", "b", 1, boolVals, TypeProtos.MinorType.BIT, props));
+    props.fields.put("C_PHONE", new FieldInfo("binary", "bin", -1, binVals, TypeProtos.MinorType.VARBINARY, props));
+    props.fields.put("C_MKTSEGMENT", new FieldInfo("binary", "bin2", -1, bin2Vals, TypeProtos.MinorType.VARBINARY, props));
+    props.fields.put("C_COMMENT", new FieldInfo("binary", "bin2", -1, bin2Vals, TypeProtos.MinorType.VARBINARY, props));
+  }
+
+  private void populatePigTPCHSupplierFields(ParquetTestProperties props){
+    // all of the data in the fieldInfo constructors doesn't matter because the file is generated outside the test
+    props.fields.put("S_SUPPKEY", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props));
+    props.fields.put("S_NATIONKEY", new FieldInfo("int64", "bigInt", 64, longVals, TypeProtos.MinorType.BIGINT, props));
+    props.fields.put("S_ACCTBAL", new FieldInfo("float", "f", 32, floatVals, TypeProtos.MinorType.FLOAT4, props));
+    props.fields.put("S_NAME", new FieldInfo("double", "d", 64, doubleVals, TypeProtos.MinorType.FLOAT8, props));
+    props.fields.put("S_ADDRESS", new FieldInfo("boolean", "b", 1, boolVals, TypeProtos.MinorType.BIT, props));
+    props.fields.put("S_PHONE", new FieldInfo("binary", "bin", -1, binVals, TypeProtos.MinorType.VARBINARY, props));
+    props.fields.put("S_COMMENT", new FieldInfo("binary", "bin2", -1, bin2Vals,