Hey Felix,
See the test_append() function at
http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_datafile.py?view=markup.
Regards,
Jeff
On Wed, Dec 22, 2010 at 4:59 PM, felix gao <[EMAIL PROTECTED]> wrote:
> Hi all,
>
> I am having trouble adding more data into a file.
>
> Environment: Python 2.6.5, avro-1.3.3-py2.6
>
> Program looks like this
>
> from avro import schema, datafile, io
>
> OUTFILE_NAME = 'sample.avro'
>
> SCHEMA_STR = """{
> "type": "record",
> "name": "bkSampleAvro",
> "namespace": "bk_avro_example",
> "fields": [
> { "name": "name" , "type": "string" },
> { "name": "age" , "type": "int" },
> { "name": "address", "type": "string" },
> { "name": "value" , "type": "long" }
> ]
> }"""
>
> SCHEMA = schema.parse(SCHEMA_STR)
> def write_avro_file():
> # Lets generate our data
> data = {}
> data['name'] = 'Foo'
> data['age'] = 19
> data['address'] = '10, Bar Eggs Spam'
> data['value'] = 800
>
> rec_writer = io.DatumWriter(SCHEMA)
>
> df_writer = datafile.DataFileWriter(
> open(OUTFILE_NAME, 'ab'),
> rec_writer,
> writers_schema = SCHEMA,
> codec = 'deflate'
> )
>
> df_writer.append(data)
>
> df_writer.close()
>
> def read_avro_file():
> rec_reader = io.DatumReader()
>
> df_reader = datafile.DataFileReader(
> open(OUTFILE_NAME, "rb"),
> rec_reader
> )
>
> for record in df_reader:
> print record['name'], record['age']
> print record['address'], record['value']
>
>
> if __name__ == '__main__':
> # Write an AVRO file first
> write_avro_file()
> write_avro_file()
>
> # Now, read it
> read_avro_file()
>
>
> The result looks like
>
> Foo 19
> 10, Bar Eggs Spam 800
> Traceback (most recent call last):
> File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py",
> line 124, in <module>
> read_avro_file()
> File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py",
> line 112, in read_avro_file
> for record in df_reader:
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/datafile.py",
> line 318, in next
> datum = self.datum_reader.read(self.datum_decoder)
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
> line 411, in read
> return self.read_data(self.writers_schema, self.readers_schema,
> decoder)
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
> line 456, in read_data
> return self.read_record(writers_schema, readers_schema, decoder)
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
> line 648, in read_record
> field_val = self.read_data(field.type, readers_field.type, decoder)
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
> line 434, in read_data
> return decoder.read_utf8()
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
> line 210, in read_utf8
> return unicode(self.read_bytes(), "utf-8")
> UnicodeDecodeError: 'utf8' codec can't decode bytes in position 14-15:
> invalid data
>
>
>
> if I remove the second write_avro_file() call then everything is fine. How
> to properly append more data into the file?
>
> Thanks,
>
> Felix
>