|
|
Russell Jurney 2012-02-05, 05:39
Why am I having tuple objects in my python udfs? This isn't how the examples work.
Error:
org.apache.pig.backend.executionengine.ExecException: ERROR 0: Error executing function at org.apache.pig.scripting.jython.JythonFunction.exec(JythonFunction.java:106) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.getNext(POUserFunc.java:216) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.getNext(POUserFunc.java:275) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator.getNext(PhysicalOperator.java:320) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach.processPlan(POForEach.java:332) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach.getNext(POForEach.java:284) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator.processInput(PhysicalOperator.java:290) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLimit.getNext(POLimit.java:85) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator.processInput(PhysicalOperator.java:290) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange.getNext(POLocalRearrange.java:256) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.runPipeline(PigGenericMapBase.java:267) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:262) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:64) at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370) at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212) Caused by: Traceback (most recent call last): File "udfs.py", line 27, in hour return tuple_time.tm_hour AttributeError: 'tuple' object has no attribute 'tm_hour' udfs.py:
#!/usr/bin/python
import time
def hour(iso_string): tuple_time = time.strptime(iso_string, "%Y-%m-%dT%H:%M:%S") return str(tuple_time.tm_hour) my.pig:
register /me/pig/build/ivy/lib/Pig/avro-1.5.3.jar register /me/pig/build/ivy/lib/Pig/json-simple-1.1.jar register /me/pig/contrib/piggybank/java/piggybank.jar register /me/pig/build/ivy/lib/Pig/jackson-core-asl-1.7.3.jar register /me/pig/build/ivy/lib/Pig/jackson-mapper-asl-1.7.3.jar
define AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); define CustomFormatToISO org.apache.pig.piggybank.evaluation.datetime.convert.CustomFormatToISO(); define substr org.apache.pig.piggybank.evaluation.string.SUBSTRING();
register 'udfs.py' using jython as agiledata;
rmf /tmp/sent_distribution.txt
/* Get email address pairs for each type of connection, and union them together */ emails = load '/me/tmp/test_inbox' using AvroStorage();
/* Filter emails according to existence of header pairs, from and [to, cc, bcc] project the pairs (may be more than one to/cc/bcc), then emit them, lowercased. */ filtered = FILTER emails BY (from is not null) and (to is not null) and (date is not null); flat = FOREACH filtered GENERATE flatten(from) as from, flatten(to) as to, agiledata.hour(date) as date; a = limit flat 10; dump a
-- Russell Jurney twitter.com/rjurney [EMAIL PROTECTED] datasyndrome.com
+
Russell Jurney 2012-02-05, 05:39
Aniket Mokashi 2012-02-05, 08:44
Looks like this is jython bug.
Btw, afaik, the return type of this function would be a bytearray if decorator is not specified.
Thanks, Aniket
On Sat, Feb 4, 2012 at 9:39 PM, Russell Jurney <[EMAIL PROTECTED]>wrote:
> Why am I having tuple objects in my python udfs? This isn't how the > examples work. > > Error: > > org.apache.pig.backend.executionengine.ExecException: ERROR 0: Error > executing function > at > > org.apache.pig.scripting.jython.JythonFunction.exec(JythonFunction.java:106) > at > > org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.getNext(POUserFunc.java:216) > at > > org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.getNext(POUserFunc.java:275) > at > > org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator.getNext(PhysicalOperator.java:320) > at > > org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach.processPlan(POForEach.java:332) > at > > org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach.getNext(POForEach.java:284) > at > > org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator.processInput(PhysicalOperator.java:290) > at > > org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLimit.getNext(POLimit.java:85) > at > > org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator.processInput(PhysicalOperator.java:290) > at > > org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange.getNext(POLocalRearrange.java:256) > at > > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.runPipeline(PigGenericMapBase.java:267) > at > > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:262) > at > > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:64) > at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144) > at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764) > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370) > at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212) > Caused by: Traceback (most recent call last): > File "udfs.py", line 27, in hour > return tuple_time.tm_hour > AttributeError: 'tuple' object has no attribute 'tm_hour' > > > udfs.py: > > #!/usr/bin/python > > import time > > def hour(iso_string): > tuple_time = time.strptime(iso_string, "%Y-%m-%dT%H:%M:%S") > return str(tuple_time.tm_hour) > > > my.pig: > > register /me/pig/build/ivy/lib/Pig/avro-1.5.3.jar > register /me/pig/build/ivy/lib/Pig/json-simple-1.1.jar > register /me/pig/contrib/piggybank/java/piggybank.jar > register /me/pig/build/ivy/lib/Pig/jackson-core-asl-1.7.3.jar > register /me/pig/build/ivy/lib/Pig/jackson-mapper-asl-1.7.3.jar > > define AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); > define CustomFormatToISO > org.apache.pig.piggybank.evaluation.datetime.convert.CustomFormatToISO(); > define substr org.apache.pig.piggybank.evaluation.string.SUBSTRING(); > > register 'udfs.py' using jython as agiledata; > > rmf /tmp/sent_distribution.txt > > /* Get email address pairs for each type of connection, and union them > together */ > emails = load '/me/tmp/test_inbox' using AvroStorage(); > > /* Filter emails according to existence of header pairs, from and [to, cc, > bcc] > project the pairs (may be more than one to/cc/bcc), then emit them, > lowercased. */ > filtered = FILTER emails BY (from is not null) and (to is not null) and > (date is not null); > flat = FOREACH filtered GENERATE flatten(from) as from, > flatten(to) as to, > agiledata.hour(date) as date; > a = limit flat 10; > dump a > > > > -- > Russell Jurney > twitter.com/rjurney > [EMAIL PROTECTED] > datasyndrome.com
"...:::Aniket:::... Quetzalco@tl"
+
Aniket Mokashi 2012-02-05, 08:44
Daniel Dai 2012-02-06, 05:03
Seems like a bug in jython: >>> import time >>> tuple_time = time.strptime('2006-10-16T08:19:39', "%Y-%m-%dT%H:%M:%S") >>> tuple_time.tm_hour Traceback (most recent call last): File "<stdin>", line 1, in <module> AttributeError: 'tuple' object has no attribute 'tm_hour' >>> tuple_time[3] 8
Change return str(tuple_time.tm_hour) into return str(tuple_time[3]) seems fix the issue.
Daniel
On Sun, Feb 5, 2012 at 12:44 AM, Aniket Mokashi <[EMAIL PROTECTED]> wrote: > Looks like this is jython bug. > > Btw, afaik, the return type of this function would be a bytearray if > decorator is not specified. > > Thanks, > Aniket > > On Sat, Feb 4, 2012 at 9:39 PM, Russell Jurney <[EMAIL PROTECTED]>wrote: > >> Why am I having tuple objects in my python udfs? This isn't how the >> examples work. >> >> Error: >> >> org.apache.pig.backend.executionengine.ExecException: ERROR 0: Error >> executing function >> at >> >> org.apache.pig.scripting.jython.JythonFunction.exec(JythonFunction.java:106) >> at >> >> org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.getNext(POUserFunc.java:216) >> at >> >> org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.getNext(POUserFunc.java:275) >> at >> >> org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator.getNext(PhysicalOperator.java:320) >> at >> >> org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach.processPlan(POForEach.java:332) >> at >> >> org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach.getNext(POForEach.java:284) >> at >> >> org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator.processInput(PhysicalOperator.java:290) >> at >> >> org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLimit.getNext(POLimit.java:85) >> at >> >> org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator.processInput(PhysicalOperator.java:290) >> at >> >> org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange.getNext(POLocalRearrange.java:256) >> at >> >> org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.runPipeline(PigGenericMapBase.java:267) >> at >> >> org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:262) >> at >> >> org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:64) >> at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144) >> at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:764) >> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370) >> at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212) >> Caused by: Traceback (most recent call last): >> File "udfs.py", line 27, in hour >> return tuple_time.tm_hour >> AttributeError: 'tuple' object has no attribute 'tm_hour' >> >> >> udfs.py: >> >> #!/usr/bin/python >> >> import time >> >> def hour(iso_string): >> tuple_time = time.strptime(iso_string, "%Y-%m-%dT%H:%M:%S") >> return str(tuple_time.tm_hour) >> >> >> my.pig: >> >> register /me/pig/build/ivy/lib/Pig/avro-1.5.3.jar >> register /me/pig/build/ivy/lib/Pig/json-simple-1.1.jar >> register /me/pig/contrib/piggybank/java/piggybank.jar >> register /me/pig/build/ivy/lib/Pig/jackson-core-asl-1.7.3.jar >> register /me/pig/build/ivy/lib/Pig/jackson-mapper-asl-1.7.3.jar >> >> define AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); >> define CustomFormatToISO >> org.apache.pig.piggybank.evaluation.datetime.convert.CustomFormatToISO(); >> define substr org.apache.pig.piggybank.evaluation.string.SUBSTRING(); >> >> register 'udfs.py' using jython as agiledata; >> >> rmf /tmp/sent_distribution.txt >> >> /* Get email address pairs for each type of connection, and union them >> together */ >> emails = load '/me/tmp/test_inbox' using AvroStorage();
+
Daniel Dai 2012-02-06, 05:03
|
|