Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Threaded View
Drill >> mail # dev >> [7/7] git commit: Drill-418: Fixes for Parquet nullable reader bug.


Copy link to this message
-
[7/7] git commit: Drill-418: Fixes for Parquet nullable reader bug.
Drill-418: Fixes for Parquet nullable reader bug.
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/7ad6de69
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/7ad6de69
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/7ad6de69

Branch: refs/heads/master
Commit: 7ad6de699303f5ba60f0d4e4034b8bbd7edd15c4
Parents: e753ade
Author: Jason Altekruse <[EMAIL PROTECTED]>
Authored: Mon Mar 17 09:12:19 2014 -0700
Committer: Jacques Nadeau <[EMAIL PROTECTED]>
Committed: Mon Mar 17 09:12:19 2014 -0700

 .../drill/exec/store/parquet/ColumnReader.java     |  2 +-
 .../exec/store/parquet/NullableColumnReader.java   |  4 ++--
 .../store/parquet/ParquetRecordReaderTest.java     | 17 ++++++++++++-----
 .../exec/store/parquet/TestFileGenerator.java      | 12 ++++++++++++
 pom.xml                                            |  4 ++--
 5 files changed, 29 insertions(+), 10 deletions(-)
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java
index 2cc126c..d5c88ef 100644
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ColumnReader.java
@@ -46,7 +46,7 @@ abstract class ColumnReader {
   final boolean isFixedLength;
 
   // counter for the total number of values read from one or more pages
-  // when a batch is filled all of these values should be the same for each column
+  // when a batch is filled all of these values should be the same for all of the columns
   int totalValuesRead;
  
   // counter for the values that have been read in this pass (a single call to the next() method)

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java
index 4c33aeb..9be8266 100644
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/NullableColumnReader.java
@@ -63,7 +63,7 @@ abstract class NullableColumnReader extends ColumnReader{
       long runStart = pageReadStatus.readPosInBytes;
       int runLength = 0;
       int currentDefinitionLevel = 0;
-      int currentValueIndexInVector = totalValuesRead;
+      int currentValueIndexInVector = (int) recordsReadInThisIteration;
       boolean lastValueWasNull = true;
       int definitionLevelsRead;
       // loop to find the longest run of defined values available, can be preceded by several nulls
@@ -77,7 +77,7 @@ abstract class NullableColumnReader extends ColumnReader{
         }
         while(currentValueIndexInVector - totalValuesRead < recordsToReadInThisPass
             && currentValueIndexInVector < valueVecHolder.getValueVector().getValueCapacity()
-            && definitionLevelsRead < pageReadStatus.currentPage.getValueCount()){
+            && pageReadStatus.valuesRead + definitionLevelsRead < pageReadStatus.currentPage.getValueCount()){
           currentDefinitionLevel = pageReadStatus.definitionLevels.readInteger();
           definitionLevelsRead++;
           if ( currentDefinitionLevel < columnDescriptor.getMaxDefinitionLevel()){

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
index 2b043aa..f6a8aa4 100644
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
@@ -78,7 +78,6 @@ public class ParquetRecordReaderTest {
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ParquetRecordReaderTest.class);
 
   static boolean VERBOSE_DEBUG = false;
-  private boolean checkValues = true;
 
   static final int numberRowGroups = 1;
   static final int recordsPerRowGroup = 300;
@@ -163,8 +162,6 @@ public class ParquetRecordReaderTest {
 
     RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
 
-    checkValues = false;
-
     DrillConfig config = DrillConfig.create();
 
     try(Drillbit bit1 = new Drillbit(config, serviceSet); DrillClient client = new DrillClient(config, serviceSet.getCoordinator());){
@@ -203,8 +200,6 @@ public class ParquetRecordReaderTest {
 
     DrillConfig config = DrillConfig.create();
 
-    checkValues = false;
-
     try(DrillClient client = new DrillClient(config);){
       client.connect();
       RecordBatchLoader batchLoader = new RecordBatchLoader(client.getAllocator());
@@ -327,6 +322,18 @@ public class ParquetRecordReaderTest {
         "/tmp/test.parquet", i, props);
   }
 
+
+  @Ignore
+  @Test
+  public void testReadBug_Drill_418() throws Exception {
+    HashMap<String, FieldInfo> fields = new HashMap<>();
+    ParquetTestProperties props = new ParquetTestProperties(5, 300000, DEFAULT_BYTES_PER_PAGE, fields);
+    TestFileGenerator.populateDrill_418_fields(props);
+    String readEntries = "\"/tmp/customer.plain.parquet\"";
+    testParquetFullEngineEventBased(false, false, "/parquet/parquet_scan_screen_read_entry_replace.json", readEntries,
+        "unused, no file is generated", 1, props, true);
+  }
+
   // requires binary file generated by pig from TPCH data, also have to disable assert where data is coming in
   @Ignore
   @Test

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/7ad6de69/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestFileGenerator.java
di