From 858abe478fd0d5efad7d914273a046fd531e08ce Mon Sep 17 00:00:00 2001 From: Sungju Jin Date: Wed, 30 Jan 2019 19:48:30 -0800 Subject: [PATCH] Support decimal type properly Redshift handles Decimal as string currently when loading data from Avro format since Redshift used older version of Avro The Avro supported Decimal type natively from 1.7.7 - https://issues.apache.org/jira/browse/AVRO-1402 --- .../scala/com/databricks/spark/redshift/RedshiftWriter.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/scala/com/databricks/spark/redshift/RedshiftWriter.scala b/src/main/scala/com/databricks/spark/redshift/RedshiftWriter.scala index 8383231d..b3385f00 100644 --- a/src/main/scala/com/databricks/spark/redshift/RedshiftWriter.scala +++ b/src/main/scala/com/databricks/spark/redshift/RedshiftWriter.scala @@ -223,6 +223,7 @@ private[redshift] class RedshiftWriter( // However, each task gets its own deserialized copy, making this safe. val conversionFunctions: Array[Any => Any] = data.schema.fields.map { field => field.dataType match { + case _: DecimalType => (v: Any) => if (v == null) null else v.toString case DateType => val dateFormat = Conversions.createRedshiftDateFormat() (v: Any) => { @@ -271,6 +272,8 @@ private[redshift] class RedshiftWriter( // strings. This is necessary for Redshift to be able to load these columns (see #39). val convertedSchema: StructType = StructType( schemaWithLowercaseColumnNames.map { + case StructField(name, _: DecimalType, nullable, meta) => + StructField(name, StringType, nullable, meta) case StructField(name, DateType, nullable, meta) => StructField(name, StringType, nullable, meta) case StructField(name, TimestampType, nullable, meta) =>