Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Threaded View
Sqoop >> mail # user >> Sqoop Import Issue with split-by column


Copy link to this message
-
Sqoop Import Issue with split-by column
All ,

Am trying to run a free form SQL from  Sqoop to import data from Oracle 10g , every time  I keep getting  the split-by column mentioned is  invalid . Can you please help figure what am I missing .
Sqoop version : 1.4.3
Oracle 10g
Ojdbc6.jar

Error

14/03/14 08:55:16 INFO mapred.JobClient: Task Id : attempt_201403100830_0118_m_000009_0, Status : FAILED
java.io.IOException: SQLException in nextKeyValue
        at org.apache.sqoop.mapreduce.db.DBRecordReader.nextKeyValue(DBRecordReader.java:265)
        at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.nextKeyValue(MapTask.java:530)
        at org.apache.hadoop.mapreduce.MapContext.nextKeyValue(MapContext.java:67)
        at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
        at org.apache.sqoop.mapreduce.AutoProgressMapper.run(AutoProgressMapper.java:64)
        at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:363)
        at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:396)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1232)
        at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: java.sql.SQLSyntaxErrorException: ORA-00904: "SI_CUSTOMER_XREF2"."CUSTOMER_ID": invalid identifier

QUERY

  sqoop import  --connect  <<connection parameter>>   -query

"
  SELECT
  SI_CUSTOMER_XREF2.GESC_CLIENT_NAME,
  SI_CUSTOMER_XREF2.GESC_CLIENT_ID,
  SI_F_L0_TRADE.DATA_SRC_SYS_CD,
  SI_F_L0_TRADE.SI_TRADE_ID,
  SI_CUSTOMER_XREF2.CUSTOMER_ID,

****
****

FROM
  (
  SELECT * FROM SI_CUSTOMER_XREF2 WHERE  RUN_ID='209'
  and \$CONDITIONS
  )  SI_CUSTOMER_XREF2 join
  (
  SELECT  T.*
FROM    SI_F_L0_TRADE T
WHERE   T.REVENUE_DATE  >=  '01-JAN-2014'  and \$CONDITIONS
    AND
   T.ORG_DC_SNAPSHOT_ID  = (SELECT D.DC_SNAPSHOT_ID FROM DC_DATE_CARD D WHERE  D.RUN_ID = '209'  and \$CONDITIONS
   )
  )  SI_F_L0_TRADE  ON ( SI_F_L0_TRADE.CUSTOMER_SGK=SI_CUSTOMER_XREF2.CUSTOMER_SGK  )
  join
  (
  SELECT * FROM SI_F_L0_TRADE_GROUP_ID G where RUN_ID='209'  and \$CONDITIONS
   AND REVENUE_DATE  >=  '01-JAN-2014'
  AND G.ORG_DC_SNAPSHOT_ID = (SELECT D.DC_SNAPSHOT_ID FROM DC_DATE_CARD D WHERE  D.RUN_ID = '209'  and \$CONDITIONS
  )
  )  SI_F_L0_TRADE_GROUP_ID ON ( SI_F_L0_TRADE.TRADE_SGK=SI_F_L0_TRADE_GROUP_ID.TRADE_SGK  )
   join
  (  SELECT * FROM cpl_trade_instrument_ref WHERE  RUN_ID= '209' -- and \$CONDITIONS
  )  CPL_INSTRUMENT_REF ON  ( CPL_INSTRUMENT_REF.INSTRUMENT_SGK=SI_F_L0_TRADE.INSTRUMENT_SGK  )
  JOIN
  (
  SELECT * FROM CPL_INSTRUMENT_REF_EXT WHERE  RUN_ID='209'  and \$CONDITIONS
  )  CPL_INSTRUMENT_REF_EXT ON    ( CPL_INSTRUMENT_REF.INSTRUMENT_SGK=CPL_INSTRUMENT_REF_EXT.INSTRUMENT_SGK)
  join
  (
  SELECT * FROM SI_BUSINESS_REGION WHERE RUN_ID='209'   and \$CONDITIONS
  )  ISSUER_BUSINESS_REGION ON  ( ISSUER_BUSINESS_REGION.ISO_CNTRY_3_CHAR_CD=CPL_INSTRUMENT_REF_EXT.ISSUER_COUNTRY_CD  )
    join
    (
  SELECT (product_line_cd ||
                    CASE WHEN nvl(new_issue_ind, '0') = '1' THEN 'P' ELSE 'S' END || '_' ||
                              nvl(business_line_cd, 'N') || '_' ||
                              nvl(trade_type_cd, 'N') || '_' ||
                              nvl(region_cd, 'N'                               )) product_cd, SI_GROUP_ID_XREF.* FROM SI_GROUP_ID_XREF
WHERE RUN_ID  = '209'  and \$CONDITIONS
  )  SI_GROUP_ID_XREF ON   ( SI_F_L0_TRADE_GROUP_ID.GROUP_ID=SI_GROUP_ID_XREF.GROUP_ID  )
  JOIN
  (
  SELECT * FROM SI_PRODUCT_LINE WHERE  RUN_ID= '209'  and \$CONDITIONS
  )  SI_PRODUCT_LINE ON       ( SI_GROUP_ID_XREF.PRODUCT_LINE_CD=SI_PRODUCT_LINE.PRODUCT_LINE_L6_SGK  )
    join
  (
  SELECT * FROM SI_BUSINESS_REGION WHERE  RUN_ID= '209'  and \$CONDITIONS
  )  SI_BUSINESS_REGION ON    ( SI_GROUP_ID_XREF.REGION_CD=SI_BUSINESS_REGION.REGION_L6_SGK  )
  JOIN
  (
  SELECT * FROM si_salesrep_xref WHERE run_id = '209'  and \$CONDITIONS
  )  MARKETER     ON  ( MARKETER.SRC_SALESREP_ID=SI_F_L0_TRADE.SRC_SALES_REP_ID)
   join
  (
  SELECT * FROM CDW_PRESENTATION.CPL_EMPLOYEE_REF WHERE run_id ='209'  and \$CONDITIONS
  )  MARKETER_EMPLOYEE_REF ON  ( MARKETER.CS_EMPLOYEE_ID=MARKETER_EMPLOYEE_REF.EMPLOYEE_ID  )
   join
  (
  SELECT * FROM SI_BOOK WHERE  RUN_ID= '209'  and \$CONDITIONS
  )  SI_BOOK  ON   ( SI_BOOK.BOOK_SGK=SI_F_L0_TRADE.BOOK_SGK  )

   join
  (SELECT * FROM cpl_sfo_country_xref ) DOMICILE_SFO_CNTRY_XREF     ON  ( SI_CUSTOMER_XREF2.DOMICILE_CNTRY_ISO_CD=DOMICILE_SFO_CNTRY_XREF.CNTRY_ISO_CD)

  join
  (
  SELECT * FROM CPL_CLIENT_REF WHERE  RUN_ID='209'   and \$CONDITIONS
   AND SRC_SYS_CD = 'GESC'   )  CPL_CLIENT_REF
ON       ( SI_CUSTOMER_XREF2.GESC_CLIENT_ID=CPL_CLIENT_REF.CLIENT_CD  )
JOIN
  (
  select * from si_client_type where run_id = '209'  and \$CONDITIONS
  )  CLIENT_TYPE      ON  ( CPL_CLIENT_REF.CLIENT_SGK=CLIENT_TYPE.CLIENT_SGK  )

   join

  (
  select * from si_client_types where run_id = '209'  and \$CONDITIONS
  )  CLIENT_TYPES     ON  ( CLIENT_TYPE.CLIENT_TYPES_SGK=CLIENT_TYPES.CLIENT_TYPES_SGK  )

    WHERE  MARKETER.SRC_SYS_CD=SI_F_L0_TRADE.SREP_SRC_SYS_CD  and \$CONDITIONS
    AND
  (
   ( ( SI_F_L0_TRADE.IGNORE_REASON_CD ) IS NULL
  AND ( SI_F_L0_TRADE_GROUP_ID.GROUP_ID ) IS NOT NULL
  AND ( SI_F_L0_TRADE_GROUP_ID.GROUP_ID ) >0
  AND SI_GROUP_ID_XREF.Can_Double_Count = 0
  AND SI_GROUP_ID_XREF.Product_Line_Cd IS NOT NULL
  AND SI_GROUP_ID_XREF.LOSS_IND >=0
  )
  AND
   SI_PRODUCT_LINE.PRODUCT_LINE_L1_NAME  =  'All (Equities)'   )
GROUP BY
  SI_CUSTOMER_XREF2.GESC_CLIENT_NAME,
  SI_CUSTOMER_XREF2.GESC_CLIENT_ID,
  SI_F_L0_TRADE.DATA_SRC_SYS_CD,
  SI_F_L0_TRADE.SI_TRADE_ID,
  SI_CUSTOMER_XREF2.CUSTOMER_ID,
****
***
" --split-by  SI_CUSTOMER_XREF2.customer_id --boundary-query "select min(CUSTOMER_ID),max(CUSTOMER_ID) from  SI_CUSTOMER_XREF2 WHERE  RUN_ID='209'" -m 48 --fields-terminated-by "|" --target-dir "/Sqoop