Home | About | Sematext search-lucene.com search-hadoop.com
NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB
 Search Hadoop and all its subprojects:

Switch to Threaded View
Hive >> mail # user >> Errors in one Hive script using LZO compression


Copy link to this message
-
Errors in one Hive script using LZO compression
Hi

I am using LZO compression in our scripts but one script is still creating errors

Diagnostic Messages for this Task:
Error: java.io.IOException: java.io.EOFException: Premature EOF from inputStream
        at org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97)
        at org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57)
        at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:243)
        at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getRecordReader(CombineHiveInputFormat.java:522)
        at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:160)
        at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:381)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
        at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:396)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
        at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
Caused by: java.io.EOFException: Premature EOF from inputStream
        at com.hadoop.compression.lzo.LzopInputStream.readFully(LzopInputStream.java:75)
        at com.hadoop.compression.lzo.LzopInputStream.readHeader(LzopInputStream.java:114)
        at com.hadoop.compression.lzo.LzopInputStream.<init>(LzopInputStream.java:54)
        at com.hadoop.compression.lzo.LzopCodec.createInputStream(LzopCodec.java:83)
        at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1871)
        at org.apache.hadoop.io.SequenceFile$Reader.initialize(SequenceFile.java:1765)
        at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1714)
        at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1728)
        at org.apache.hadoop.mapred.SequenceFileRecordReader.<init>(SequenceFileRecordReader.java:49)
        at org.apache.hadoop.mapred.SequenceFileInputFormat.getRecordReader(SequenceFileInputFormat.java:64)
        at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:240)
        ... 9 more
SCRIPT
======set hiveconf mapred.output.compression.type=BLOCK;
set mapred.map.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
set mapreduce.map.output.compress=true;
set hive.exec.compress.output=true;
set mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzopCodec;
set mapreduce.output.fileoutputformat.compress=true;
set hive.exec.compress.intermediate=true;
set mapreduce.job.maps=500;
set mapreduce.job.reduces=8;
set mapreduce.tasktracker.map.tasks.maximum=12;
set mapreduce.tasktracker.reduce.tasks.maximum=8;
add jar /home/nextag/sasubramanian/mycode/impressions/jar/impressions-hiveudfs-1.0-20130615-155038.jar;
create temporary function collect  as 'com.wizecommerce.utils.hive.udf.GenericUDAFCollect';
create temporary function isnextagip  as 'com.wizecommerce.utils.hive.udf.IsNextagIP';
create temporary function isfrombot  as 'com.wizecommerce.utils.hive.udf.IsFromBot';
create temporary function processblankkeyword  as 'com.wizecommerce.utils.hive.udf.ProcessBlankKeyword';
create temporary function getValidHiddenSellers as 'com.wizecommerce.utils.hive.udf.GetValidHiddenSellers';
INSERT OVERWRITE DIRECTORY '/user/beeswax/warehouse/keyword_impressions_ptitles_log/2013-03-19'
SELECT
     hp.header_date,
     hp.impression_id,
     hp.header_searchsessionid,
     hp.cached_visit_id,
     split(hp.header_servername,'[\.]')[0],
     hp.cached_ip,
     hp.header_adnode,
     IF (concat_ws(',' , collect_set(concat_ws('|', cast(hp.seller_id as STRING), cast(IF(hp.seller_pricetier IS NULL, -1L, hp.seller_pricetier) as STRING), cast(hp.seller_price as STRING), cast(IF(hp.ptitle_rank IS  NULL, -1L, hp.ptitle_rank) as STRING)))) = '-1|-1',NULL,concat_ws(',' , collect_set(concat_ws('|', cast(hp.seller_id as STRING), cast(IF(hp.seller_pricetier IS NULL, -1L, hp.seller_pricetier) as STRING), cast(hp.seller_price as STRING), cast(IF(hp.ptitle_rank IS  NULL, -1L, hp.ptitle_rank) as STRING))))),
     IF(concat_ws(',' , getValidHiddenSellers(collect_set(concat_ws('|', cast(sh.seller_id as STRING), cast(sh.ptitle_id as STRING), cast(sh.tag_id as STRING), cast(IF(sh.price_tier IS NULL, -1L, sh.price_tier) as STRING))))) = '',NULL, concat_ws(',' , getValidHiddenSellers(collect_set(concat_ws('|', cast(sh.seller_id as STRING), cast(sh.ptitle_id as STRING), cast(sh.tag_id as STRING), cast(IF(sh.price_tier IS NULL, -1L, sh.price_tier) as STRING))))))
FROM
     (SELECT
          h.header_date,
          h.header_servername,
          h.impression_id,
          h.header_searchsessionid,
          h.cached_visit_id,
          h.cached_ip,
          h.header_adnode,
          p.ptitle_ptitleid,
          p.seller_id,
          p.seller_pricetier,
          p.seller_price,
          p.ptitle_rank
     FROM
          (SELECT
               header_date,
               header_servername,
               impression_id,
               header_searchsessionid,
               cached_ip,
               header_adnode,
               cached_recordid,
               cached_visit_id
           FROM
                outpdir_impressions_header
           WHERE
              header_date_partition='2013-03-19'
           AND
              header_rbabsentsellers = 1L
           AND
              cached_recordid IS NOT NULL
           AND
              isnextagip(cached_ip) = FALSE
           AND
              isfrombot(cached_visit_id) = FALSE
          ) h
     LEFT OUTER JOIN
          (SELECT
               po.impression_id,
               po.ptitle_ptitleid,
               po.header_date,
               po.seller_id,
               po.seller_pricetier,
            
NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB