diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 52c0315bd0732..97c8f059bcdec 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -1155,6 +1155,13 @@ ], "sqlState" : "42623" }, + "DESCRIBE_JSON_NOT_EXTENDED" : { + "message" : [ + "DESCRIBE TABLE ... AS JSON only supported when [EXTENDED|FORMATTED] is specified.", + "For example: DESCRIBE EXTENDED AS JSON is supported but DESCRIBE AS JSON is not." + ], + "sqlState" : "0A000" + }, "DISTINCT_WINDOW_FUNCTION_UNSUPPORTED" : { "message" : [ "Distinct window functions are not supported: ." @@ -5283,6 +5290,11 @@ "Attach a comment to the namespace ." ] }, + "DESC_TABLE_COLUMN_JSON" : { + "message" : [ + "DESC TABLE COLUMN AS JSON not supported for individual columns." + ] + }, "DESC_TABLE_COLUMN_PARTITION" : { "message" : [ "DESC TABLE COLUMN for a specific partition." diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 50cdcd6d09790..3b1138b9ee0e5 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -568,6 +568,7 @@ Below is a list of all the keywords in Spark SQL. |ITEMS|non-reserved|non-reserved|non-reserved| |ITERATE|non-reserved|non-reserved|non-reserved| |JOIN|reserved|strict-non-reserved|reserved| +|JSON|non-reserved|non-reserved|non-reserved| |KEYS|non-reserved|non-reserved|non-reserved| |LANGUAGE|non-reserved|non-reserved|reserved| |LAST|non-reserved|non-reserved|non-reserved| diff --git a/docs/sql-ref-syntax-aux-describe-table.md b/docs/sql-ref-syntax-aux-describe-table.md index 4b6e1e8c3461e..6a14da1e43801 100644 --- a/docs/sql-ref-syntax-aux-describe-table.md +++ b/docs/sql-ref-syntax-aux-describe-table.md @@ -29,16 +29,17 @@ to return the metadata pertaining to a partition or column respectively. ### Syntax ```sql -{ DESC | DESCRIBE } [ TABLE ] [ format ] table_identifier [ partition_spec ] [ col_name ] +{ DESC | DESCRIBE } [ TABLE ] [ format ] table_identifier [ partition_spec ] [ col_name ] [ AS JSON ] ``` ### Parameters * **format** - Specifies the optional format of describe output. If `EXTENDED` is specified + Specifies the optional format of describe output. If `EXTENDED` or `FORMATTED` is specified then additional metadata information (such as parent database, owner, and access time) - is returned. + is returned. Also if `EXTENDED` or `FORMATTED` is specified, then the metadata can be returned + in JSON format by specifying `AS JSON` at the end of the statement. * **table_identifier** @@ -60,8 +61,96 @@ to return the metadata pertaining to a partition or column respectively. and `col_name` are mutually exclusive and can not be specified together. Currently nested columns are not allowed to be specified. + JSON format is not currently supported for individual columns. + **Syntax:** `[ database_name. ] [ table_name. ] column_name` +* **AS JSON** + + An optional parameter to return the table metadata in JSON format. Only supported when `EXTENDED` + or `FORMATTED` format is specified (both produce equivalent JSON). + + **Syntax:** `[ AS JSON ]` + + **Schema:** + + Below is the full JSON schema. + In actual output, null fields are omitted and the JSON is not pretty-printed (see Examples). + + ```sql + { + "table_name": "", + "catalog_name": "", + "schema_name": "", + "namespace": [""], + "type": "", + "provider": "", + "columns": [ + { + "name": "", + "type": , + "comment": "", + "nullable": , + "default": "" + } + ], + "partition_values": { + "": "" + }, + "location": "", + "view_text": "", + "view_original_text": "", + "view_schema_mode": "", + "view_catalog_and_namespace": "", + "view_query_output_columns": ["col1", "col2"], + "comment": "", + "table_properties": { + "property1": "", + "property2": "" + }, + "storage_properties": { + "property1": "", + "property2": "" + }, + "serde_library": "", + "input_format": "", + "output_format": "", + "num_buckets": , + "bucket_columns": [""], + "sort_columns": [""], + "created_time": "", + "created_by": "", + "last_access": "", + "partition_provider": "" + } + ``` + + Below are the schema definitions for ``: + +| Spark SQL Data Types | JSON Representation | +|-----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ByteType | `{ "name" : "tinyint" }` | +| ShortType | `{ "name" : "smallint" }` | +| IntegerType | `{ "name" : "int" }` | +| LongType | `{ "name" : "bigint" }` | +| FloatType | `{ "name" : "float" }` | +| DoubleType | `{ "name" : "double" }` | +| DecimalType | `{ "name" : "decimal", "precision": p, "scale": s }` | +| StringType | `{ "name" : "string" }` | +| VarCharType | `{ "name" : "varchar", "length": n }` | +| CharType | `{ "name" : "char", "length": n }` | +| BinaryType | `{ "name" : "binary" }` | +| BooleanType | `{ "name" : "boolean" }` | +| DateType | `{ "name" : "date" }` | +| VariantType | `{ "name" : "variant" }` | +| TimestampType | `{ "name" : "timestamp_ltz" }` | +| TimestampNTZType | `{ "name" : "timestamp_ntz" }` | +| YearMonthIntervalType | `{ "name" : "interval", "start_unit": "", "end_unit": "" }` | +| DayTimeIntervalType | `{ "name" : "interval", "start_unit": "", "end_unit": "" }` | +| ArrayType | `{ "name" : "array", "element_type": , "element_nullable": }` | +| MapType | `{ "name" : "map", "key_type": , "value_type": , "value_nullable": }` | +| StructType | `{ "name" : "struct", "fields": [ {"name" : "field1", "type" : , “nullable”: , "comment": “”, "default": “”}, ... ] }` | + ### Examples ```sql @@ -173,6 +262,10 @@ DESCRIBE customer salesdb.customer.name; |data_type| string| | comment|Short name| +---------+----------+ + +-- Returns the table metadata in JSON format. +DESC FORMATTED customer AS JSON; +{"table_name":"customer","catalog_name":"spark_catalog","schema_name":"default","namespace":["default"],"columns":[{"name":"cust_id","type":{"name":"integer"},"nullable":true},{"name":"name","type":{"name":"string"},"comment":"Short name","nullable":true},{"name":"state","type":{"name":"varchar","length":20},"nullable":true}],"location": "file:/tmp/salesdb.db/custom...","created_time":"2020-04-07T14:05:43Z","last_access":"UNKNOWN","created_by":"None","type":"MANAGED","provider":"parquet","partition_provider":"Catalog","partition_columns":["state"]} ``` ### Related Statements diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index 91a267364216c..dafeed48aef11 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -283,6 +283,7 @@ IS: 'IS'; ITEMS: 'ITEMS'; ITERATE: 'ITERATE'; JOIN: 'JOIN'; +JSON: 'JSON'; KEYS: 'KEYS'; LANGUAGE: 'LANGUAGE'; LAST: 'LAST'; diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 3ca120da98dd4..667d200268cf8 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -287,7 +287,7 @@ statement | (DESC | DESCRIBE) namespace EXTENDED? identifierReference #describeNamespace | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)? - identifierReference partitionSpec? describeColName? #describeRelation + identifierReference partitionSpec? describeColName? (AS JSON)? #describeRelation | (DESC | DESCRIBE) QUERY? query #describeQuery | COMMENT ON namespace identifierReference IS comment #commentNamespace @@ -1680,6 +1680,7 @@ ansiNonReserved | INVOKER | ITEMS | ITERATE + | JSON | KEYS | LANGUAGE | LAST @@ -2039,6 +2040,7 @@ nonReserved | IS | ITEMS | ITERATE + | JSON | KEYS | LANGUAGE | LAST diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala index 3e63b8281f739..617cab4b2a39b 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala @@ -41,6 +41,18 @@ private[sql] trait CompilationErrors extends DataTypeErrorsBase { cause = Option(cause)) } + def describeJsonNotExtendedError(tableName: String): AnalysisException = { + new AnalysisException( + errorClass = "DESCRIBE_JSON_NOT_EXTENDED", + messageParameters = Map("tableName" -> tableName)) + } + + def describeColJsonUnsupportedError(): AnalysisException = { + new AnalysisException( + errorClass = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON", + messageParameters = Map.empty) + } + def cannotFindDescriptorFileError(filePath: String, cause: Throwable): AnalysisException = { new AnalysisException( errorClass = "PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index e199452a2da67..858e2cf25b6fe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -28,7 +28,7 @@ import com.fasterxml.jackson.annotation.JsonInclude.Include import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule} import org.apache.commons.lang3.StringUtils -import org.json4s.JsonAST.{JArray, JString} +import org.json4s.JsonAST.{JArray, JBool, JDouble, JInt, JNull, JObject, JString, JValue} import org.json4s.jackson.JsonMethods._ import org.apache.spark.SparkException @@ -51,6 +51,48 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.util.{CaseInsensitiveStringMap, SchemaUtils} import org.apache.spark.util.ArrayImplicits._ +/** + * Interface providing util to convert JValue to String representation of catalog entities. + */ +trait MetadataMapSupport { + def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] + + def toLinkedHashMap: mutable.LinkedHashMap[String, String] = { + jsonToString(toJsonLinkedHashMap) + } + + protected def jsonToString( + jsonMap: mutable.LinkedHashMap[String, JValue]): mutable.LinkedHashMap[String, String] = { + def removeWhitespace(str: String): String = { + str.replaceAll("\\s+$", "") + } + + val map = new mutable.LinkedHashMap[String, String]() + jsonMap.foreach { case (key, jValue) => + val stringValue = jValue match { + case JString(value) => removeWhitespace(value) + case JArray(values) => + values.map(_.values) + .map { + case str: String => quoteIdentifier(removeWhitespace(str)) + case other => removeWhitespace(other.toString) + } + .mkString("[", ", ", "]") + case JObject(fields) => + fields.map { case (k, v) => + s"$k=${removeWhitespace(v.values.toString)}" + } + .mkString("[", ", ", "]") + case JInt(value) => value.toString + case JDouble(value) => value.toString + case _ => removeWhitespace(jValue.values.toString) + } + map.put(key, stringValue) + } + map + } +} + /** * A function defined in the catalog. @@ -74,25 +116,31 @@ case class CatalogStorageFormat( outputFormat: Option[String], serde: Option[String], compressed: Boolean, - properties: Map[String, String]) { + properties: Map[String, String]) extends MetadataMapSupport { override def toString: String = { - toLinkedHashMap.map { case ((key, value)) => + toLinkedHashMap.map { case (key, value) => if (value.isEmpty) key else s"$key: $value" }.mkString("Storage(", ", ", ")") } - def toLinkedHashMap: mutable.LinkedHashMap[String, String] = { - val map = new mutable.LinkedHashMap[String, String]() - locationUri.foreach(l => map.put("Location", l.toString)) - serde.foreach(map.put("Serde Library", _)) - inputFormat.foreach(map.put("InputFormat", _)) - outputFormat.foreach(map.put("OutputFormat", _)) - if (compressed) map.put("Compressed", "") + def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = { + val map = mutable.LinkedHashMap[String, JValue]() + + locationUri.foreach(l => map += ("Location" -> JString(l.toString))) + serde.foreach(s => map += ("Serde Library" -> JString(s))) + inputFormat.foreach(format => map += ("InputFormat" -> JString(format))) + outputFormat.foreach(format => map += ("OutputFormat" -> JString(format))) + + if (compressed) map += ("Compressed" -> JBool(true)) + SQLConf.get.redactOptions(properties) match { case props if props.isEmpty => // No-op case props => - map.put("Storage Properties", props.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")) + val storagePropsJson = JObject( + props.map { case (k, v) => k -> JString(v) }.toList + ) + map += ("Storage Properties" -> storagePropsJson) } map } @@ -120,35 +168,44 @@ case class CatalogTablePartition( parameters: Map[String, String] = Map.empty, createTime: Long = System.currentTimeMillis, lastAccessTime: Long = -1, - stats: Option[CatalogStatistics] = None) { + stats: Option[CatalogStatistics] = None) extends MetadataMapSupport { + def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = { + val map = mutable.LinkedHashMap[String, JValue]() - def toLinkedHashMap: mutable.LinkedHashMap[String, String] = { - val map = new mutable.LinkedHashMap[String, String]() - val specString = spec.map { case (k, v) => s"$k=$v" }.mkString(", ") - map.put("Partition Values", s"[$specString]") - map ++= storage.toLinkedHashMap - if (parameters.nonEmpty) { - map.put("Partition Parameters", s"{" + - s"${SQLConf.get.redactOptions(parameters).map(p => p._1 + "=" + p._2).mkString(", ")}}") + val specJson = JObject(spec.map { case (k, v) => k -> JString(v) }.toList) + map += ("Partition Values" -> specJson) + + storage.toJsonLinkedHashMap.foreach { case (k, v) => + map += (k -> v) } - map.put("Created Time", new Date(createTime).toString) - val lastAccess = { - if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString + + if (parameters.nonEmpty) { + val paramsJson = JObject(SQLConf.get.redactOptions(parameters).map { + case (k, v) => k -> JString(v) + }.toList) + map += ("Partition Parameters" -> paramsJson) } - map.put("Last Access", lastAccess) - stats.foreach(s => map.put("Partition Statistics", s.simpleString)) + + map += ("Created Time" -> JString(new Date(createTime).toString)) + + val lastAccess = if (lastAccessTime <= 0) JString("UNKNOWN") + else JString(new Date(lastAccessTime).toString) + map += ("Last Access" -> lastAccess) + + stats.foreach(s => map += ("Partition Statistics" -> JString(s.simpleString))) + map } override def toString: String = { - toLinkedHashMap.map { case ((key, value)) => + toLinkedHashMap.map { case (key, value) => if (value.isEmpty) key else s"$key: $value" }.mkString("CatalogPartition(\n\t", "\n\t", ")") } /** Readable string representation for the CatalogTablePartition. */ def simpleString: String = { - toLinkedHashMap.map { case ((key, value)) => + toLinkedHashMap.map { case (key, value) => if (value.isEmpty) key else s"$key: $value" }.mkString("", "\n", "") } @@ -284,7 +341,7 @@ object ClusterBySpec { case class BucketSpec( numBuckets: Int, bucketColumnNames: Seq[String], - sortColumnNames: Seq[String]) extends SQLConfHelper { + sortColumnNames: Seq[String]) extends SQLConfHelper with MetadataMapSupport { if (numBuckets <= 0 || numBuckets > conf.bucketingMaxBuckets) { throw QueryCompilationErrors.invalidBucketNumberError( @@ -301,11 +358,11 @@ case class BucketSpec( s"$numBuckets buckets, $bucketString$sortString" } - def toLinkedHashMap: mutable.LinkedHashMap[String, String] = { - mutable.LinkedHashMap[String, String]( - "Num Buckets" -> numBuckets.toString, - "Bucket Columns" -> bucketColumnNames.map(quoteIdentifier).mkString("[", ", ", "]"), - "Sort Columns" -> sortColumnNames.map(quoteIdentifier).mkString("[", ", ", "]") + def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = { + mutable.LinkedHashMap[String, JValue]( + "Num Buckets" -> JInt(numBuckets), + "Bucket Columns" -> JArray(bucketColumnNames.map(JString).toList), + "Sort Columns" -> JArray(sortColumnNames.map(JString).toList) ) } } @@ -355,7 +412,7 @@ case class CatalogTable( tracksPartitionsInCatalog: Boolean = false, schemaPreservesCase: Boolean = true, ignoredProperties: Map[String, String] = Map.empty, - viewOriginalText: Option[String] = None) { + viewOriginalText: Option[String] = None) extends MetadataMapSupport { import CatalogTable._ @@ -524,67 +581,81 @@ case class CatalogTable( locationUri, inputFormat, outputFormat, serde, compressed, properties)) } + def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = { + val filteredTableProperties = SQLConf.get + .redactOptions(properties.filter { case (k, v) => + !k.startsWith(VIEW_PREFIX) && v.nonEmpty + }) - def toLinkedHashMap: mutable.LinkedHashMap[String, String] = { - val map = new mutable.LinkedHashMap[String, String]() - val tableProperties = - SQLConf.get.redactOptions(properties.filter { case (k, _) => !k.startsWith(VIEW_PREFIX) }) - .toSeq.sortBy(_._1) - .map(p => p._1 + "=" + p._2) - val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]") - val lastAccess = { - if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString + val tableProperties: JValue = + if (filteredTableProperties.isEmpty) JNull + else JObject( + filteredTableProperties.toSeq.sortBy(_._1).map { case (k, v) => k -> JString(v) }: _*) + + val partitionColumns: JValue = + if (partitionColumnNames.nonEmpty) JArray(partitionColumnNames.map(JString).toList) + else JNull + + val lastAccess: JValue = + if (lastAccessTime <= 0) JString("UNKNOWN") else JString( + DateTimeUtils.microsToInstant(DateTimeUtils.millisToMicros(lastAccessTime)).toString) + + val viewQueryOutputColumns: JValue = + if (viewQueryColumnNames.nonEmpty) JArray(viewQueryColumnNames.map(JString).toList) + else JNull + + val map = mutable.LinkedHashMap[String, JValue]() + + if (identifier.catalog.isDefined) map += "Catalog" -> JString(identifier.catalog.get) + if (identifier.database.isDefined) map += "Database" -> JString(identifier.database.get) + map += "Table" -> JString(identifier.table) + if (Option(owner).exists(_.nonEmpty)) map += "Owner" -> JString(owner) + map += "Created Time" -> + JString(DateTimeUtils.microsToInstant(DateTimeUtils.millisToMicros(createTime)).toString) + if (lastAccess != JNull) map += "Last Access" -> lastAccess + map += "Created By" -> JString(s"Spark $createVersion") + map += "Type" -> JString(tableType.name) + if (provider.isDefined) map += "Provider" -> JString(provider.get) + bucketSpec.foreach { spec => + map ++= spec.toJsonLinkedHashMap.map { case (k, v) => k -> v } } - - identifier.catalog.foreach(map.put("Catalog", _)) - identifier.database.foreach(map.put("Database", _)) - map.put("Table", identifier.table) - if (owner != null && owner.nonEmpty) map.put("Owner", owner) - map.put("Created Time", new Date(createTime).toString) - map.put("Last Access", lastAccess) - map.put("Created By", "Spark " + createVersion) - map.put("Type", tableType.name) - provider.foreach(map.put("Provider", _)) - bucketSpec.foreach(map ++= _.toLinkedHashMap) - comment.foreach(map.put("Comment", _)) - collation.foreach(map.put("Collation", _)) - if (tableType == CatalogTableType.VIEW) { - viewText.foreach(map.put("View Text", _)) - viewOriginalText.foreach(map.put("View Original Text", _)) - if (SQLConf.get.viewSchemaBindingEnabled) { - map.put("View Schema Mode", viewSchemaMode.toString) - } - if (viewCatalogAndNamespace.nonEmpty) { - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ - map.put("View Catalog and Namespace", viewCatalogAndNamespace.quoted) - } - if (viewQueryColumnNames.nonEmpty) { - map.put("View Query Output Columns", - viewQueryColumnNames.map(quoteIdentifier).mkString("[", ", ", "]")) - } + if (comment.isDefined) map += "Comment" -> JString(comment.get) + if (collation.isDefined) map += "Collation" -> JString(collation.get) + if (tableType == CatalogTableType.VIEW && viewText.isDefined) { + map += "View Text" -> JString(viewText.get) } - - if (tableProperties.nonEmpty) { - map.put("Table Properties", tableProperties.mkString("[", ", ", "]")) + if (tableType == CatalogTableType.VIEW && viewOriginalText.isDefined) { + map += "View Original Text" -> JString(viewOriginalText.get) } - stats.foreach(s => map.put("Statistics", s.simpleString)) - map ++= storage.toLinkedHashMap - if (tracksPartitionsInCatalog) map.put("Partition Provider", "Catalog") - if (partitionColumnNames.nonEmpty) map.put("Partition Columns", partitionColumns) - if (schema.nonEmpty) map.put("Schema", schema.treeString) - - map + if (SQLConf.get.viewSchemaBindingEnabled && tableType == CatalogTableType.VIEW) { + map += "View Schema Mode" -> JString(viewSchemaMode.toString) + } + if (viewCatalogAndNamespace.nonEmpty && tableType == CatalogTableType.VIEW) { + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + map += "View Catalog and Namespace" -> JString(viewCatalogAndNamespace.quoted) + } + if (viewQueryOutputColumns != JNull) { + map += "View Query Output Columns" -> viewQueryOutputColumns + } + if (tableProperties != JNull) map += "Table Properties" -> tableProperties + if (stats.isDefined) map += "Statistics" -> JString(stats.get.simpleString) + map ++= storage.toJsonLinkedHashMap.map { case (k, v) => k -> v } + if (tracksPartitionsInCatalog) map += "Partition Provider" -> JString("Catalog") + if (partitionColumns != JNull) map += "Partition Columns" -> partitionColumns + if (schema.nonEmpty) map += "Schema" -> JString(schema.treeString) + + map.filterNot(_._2 == JNull) } override def toString: String = { - toLinkedHashMap.map { case ((key, value)) => + toLinkedHashMap.map { case (key, value) => if (value.isEmpty) key else s"$key: $value" }.mkString("CatalogTable(\n", "\n", ")") } /** Readable string representation for the CatalogTable. */ def simpleString: String = { - toLinkedHashMap.map { case ((key, value)) => + toLinkedHashMap.map { case (key, value) => if (value.isEmpty) key else s"$key: $value" }.mkString("", "\n", "") } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 0a300cea03ffe..1f9c148303647 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -5200,10 +5200,17 @@ class AstBuilder extends DataTypeAstBuilder */ override def visitDescribeRelation(ctx: DescribeRelationContext): LogicalPlan = withOrigin(ctx) { val isExtended = ctx.EXTENDED != null || ctx.FORMATTED != null + val asJson = ctx.JSON != null + if (asJson && !isExtended) { + val tableName = ctx.identifierReference.getText.split("\\.").lastOption.getOrElse("table") + throw QueryCompilationErrors.describeJsonNotExtendedError(tableName) + } val relation = createUnresolvedTableOrView(ctx.identifierReference, "DESCRIBE TABLE") if (ctx.describeColName != null) { if (ctx.partitionSpec != null) { throw QueryParsingErrors.descColumnForPartitionUnsupportedError(ctx) + } else if (asJson) { + throw QueryCompilationErrors.describeColJsonUnsupportedError() } else { DescribeColumn( relation, @@ -5221,7 +5228,11 @@ class AstBuilder extends DataTypeAstBuilder } else { Map.empty[String, String] } - DescribeRelation(relation, partitionSpec, isExtended) + if (asJson) { + DescribeRelationJson(relation, partitionSpec, isExtended) + } else { + DescribeRelation(relation, partitionSpec, isExtended) + } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala index 99d2ea7751959..a6ec6f5736300 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala @@ -21,13 +21,19 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.types.{MetadataBuilder, StringType} private[sql] object DescribeCommandSchema { - def describeTableAttributes(): Seq[AttributeReference] = Seq( - AttributeReference("col_name", StringType, nullable = false, - new MetadataBuilder().putString("comment", "name of the column").build())(), - AttributeReference("data_type", StringType, nullable = false, - new MetadataBuilder().putString("comment", "data type of the column").build())(), - AttributeReference("comment", StringType, nullable = true, - new MetadataBuilder().putString("comment", "comment of the column").build())()) + def describeJsonTableAttributes(): Seq[AttributeReference] = + Seq( + AttributeReference("json_metadata", StringType, nullable = false, + new MetadataBuilder().putString("comment", "JSON metadata of the table").build())() + ) + def describeTableAttributes(): Seq[AttributeReference] = { + Seq(AttributeReference("col_name", StringType, nullable = false, + new MetadataBuilder().putString("comment", "name of the column").build())(), + AttributeReference("data_type", StringType, nullable = false, + new MetadataBuilder().putString("comment", "data type of the column").build())(), + AttributeReference("comment", StringType, nullable = true, + new MetadataBuilder().putString("comment", "comment of the column").build())()) + } def describeColumnAttributes(): Seq[AttributeReference] = Seq( AttributeReference("info_name", StringType, nullable = false, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 58c62a90225aa..b486a1fd0a72a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -691,6 +691,19 @@ object DescribeRelation { def getOutputAttrs: Seq[Attribute] = DescribeCommandSchema.describeTableAttributes() } +/** + * The logical plan of the DESCRIBE relation_name AS JSON command. + */ +case class DescribeRelationJson( + relation: LogicalPlan, + partitionSpec: TablePartitionSpec, + isExtended: Boolean) extends UnaryCommand { + override val output: Seq[Attribute] = DescribeCommandSchema.describeJsonTableAttributes() + override def child: LogicalPlan = relation + override protected def withNewChildInternal(newChild: LogicalPlan): DescribeRelationJson = + copy(relation = newChild) +} + /** * The logical plan of the DESCRIBE relation_name col_name command. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 65ae8da3c4da1..ac419fd150ae3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1615,6 +1615,10 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat notSupportedForV2TablesError("ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]") } + def describeAsJsonNotSupportedForV2TablesError(): Throwable = { + notSupportedForV2TablesError("DESCRIBE TABLE AS JSON") + } + def loadDataNotSupportedForV2TablesError(): Throwable = { notSupportedForV2TablesError("LOAD DATA") } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 6a388a7849f75..3b58518b98da9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -152,13 +152,17 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) case RenameTable(ResolvedV1TableOrViewIdentifier(oldIdent), newName, isView) => AlterTableRenameCommand(oldIdent, newName.asTableIdentifier, isView) + case DescribeRelationJson( + ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended) => + DescribeTableJsonCommand(ident, partitionSpec, isExtended) + // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet. case DescribeRelation( - ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended, output) => + ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended, output) => DescribeTableCommand(ident, partitionSpec, isExtended, output) case DescribeColumn( - ResolvedViewIdentifier(ident), column: UnresolvedAttribute, isExtended, output) => + ResolvedViewIdentifier(ident), column: UnresolvedAttribute, isExtended, output) => // For views, the column will not be resolved by `ResolveReferences` because // `ResolvedView` stores only the identifier. DescribeColumnCommand(ident, column.nameParts, isExtended, output) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index a58e8fac6e36d..e69e05ba7decd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -19,12 +19,16 @@ package org.apache.spark.sql.execution.command import java.net.{URI, URISyntaxException} +import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.jdk.CollectionConverters._ import scala.util.control.NonFatal import org.apache.hadoop.fs.{FileContext, FsConstants, Path} import org.apache.hadoop.fs.permission.{AclEntry, AclEntryScope, AclEntryType, FsAction, FsPermission} +import org.json4s._ +import org.json4s.JsonAST.JObject +import org.json4s.jackson.JsonMethods._ import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.{SQLConfHelper, TableIdentifier} @@ -746,6 +750,261 @@ case class DescribeTableCommand( } } +/** + * Command that looks like + * {{{ + * DESCRIBE [EXTENDED|FORMATTED] table_name partitionSpec? [AS JSON]; + * }}} + */ +case class DescribeTableJsonCommand( + table: TableIdentifier, + partitionSpec: TablePartitionSpec, + isExtended: Boolean) extends LeafRunnableCommand { + override val output = DescribeCommandSchema.describeJsonTableAttributes() + // Already added to jsonMap in DescribeTableJsonCommand + private val excludedKeys = Set("catalog", "schema", "database", "table") + + override def run(sparkSession: SparkSession): Seq[Row] = { + val jsonMap = mutable.LinkedHashMap[String, JValue]() + val catalog = sparkSession.sessionState.catalog + + if (catalog.isTempView(table)) { + if (partitionSpec.nonEmpty) { + throw QueryCompilationErrors.descPartitionNotAllowedOnTempView(table.identifier) + } + val schema = catalog.getTempViewOrPermanentTableMetadata(table).schema + describeColsJson(schema, jsonMap, header = false) + } else { + val metadata = catalog.getTableRawMetadata(table) + val schema = if (metadata.schema.isEmpty) { + // In older versions of Spark, + // the table schema can be empty and should be inferred at runtime. + sparkSession.table(metadata.identifier).schema + } else { + metadata.schema + } + + addKeyValueToMap("table_name", JString(metadata.identifier.table), jsonMap) + table.catalog.foreach(catalog => addKeyValueToMap("catalog_name", JString(catalog), jsonMap)) + table.database.foreach { db => + addKeyValueToMap("namespace", JArray(List(JString(db))), jsonMap) + addKeyValueToMap("schema_name", JString(db), jsonMap) + } + + describeColsJson(schema, jsonMap, header = false) + describeClusteringInfoJson(metadata, jsonMap) + + if (partitionSpec.nonEmpty) { + // Outputs the partition-specific info for the DDL command: + // "DESCRIBE [EXTENDED|FORMATTED] table_name PARTITION (partitionVal*)" + describePartitionInfoJson(sparkSession, catalog, metadata, jsonMap) + } else { + describeFormattedTableInfoJson(metadata, jsonMap) + } + } + + Seq(Row(compact(render(JObject(jsonMap.toList))))) + } + + private def addKeyValueToMap( + key: String, + value: JValue, + jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = { + // Rename some JSON keys that are pre-named in describe table implementation + val renames = Map( + "inputformat" -> "input_format", + "outputformat" -> "output_format" + ) + + val normalizedKey = key.toLowerCase().replace(" ", "_") + val renamedKey = renames.getOrElse(normalizedKey, normalizedKey) + + if (!jsonMap.contains(renamedKey) && !excludedKeys.contains(renamedKey)) { + jsonMap += renamedKey -> value + } + } + + /** + * Util to recursively form JSON string representation of data type, used for DESCRIBE AS JSON. + * Differs from `json` in DataType.scala by providing additional fields for some types. + */ + private def jsonType( + dataType: DataType): JValue = { + dataType match { + case arrayType: ArrayType => + JObject( + "name" -> JString("array"), + "element_type" -> jsonType(arrayType.elementType), + "element_nullable" -> JBool(arrayType.containsNull) + ) + + case mapType: MapType => + JObject( + "name" -> JString("map"), + "key_type" -> jsonType(mapType.keyType), + "value_type" -> jsonType(mapType.valueType), + "value_nullable" -> JBool(mapType.valueContainsNull) + ) + + case structType: StructType => + val fieldsJson = structType.fields.map { field => + val baseJson = List( + "name" -> JString(field.name), + "type" -> jsonType(field.dataType), + "nullable" -> JBool(field.nullable) + ) + val commentJson = field.getComment().map(comment => "comment" -> JString(comment)).toList + val defaultJson = + field.getCurrentDefaultValue().map(default => "default" -> JString(default)).toList + + JObject(baseJson ++ commentJson ++ defaultJson: _*) + }.toList + + JObject( + "name" -> JString("struct"), + "fields" -> JArray(fieldsJson) + ) + + case decimalType: DecimalType => + JObject( + "name" -> JString("decimal"), + "precision" -> JInt(decimalType.precision), + "scale" -> JInt(decimalType.scale) + ) + + case varcharType: VarcharType => + JObject( + "name" -> JString("varchar"), + "length" -> JInt(varcharType.length) + ) + + case charType: CharType => + JObject( + "name" -> JString("char"), + "length" -> JInt(charType.length) + ) + + // Only override TimestampType; TimestampType_NTZ type is already timestamp_ntz + case _: TimestampType => + JObject("name" -> JString("timestamp_ltz")) + + case yearMonthIntervalType: YearMonthIntervalType => + def getFieldName(field: Byte): String = YearMonthIntervalType.fieldToString(field) + + JObject( + "name" -> JString("interval"), + "start_unit" -> JString(getFieldName(yearMonthIntervalType.startField)), + "end_unit" -> JString(getFieldName(yearMonthIntervalType.endField)) + ) + + case dayTimeIntervalType: DayTimeIntervalType => + def getFieldName(field: Byte): String = DayTimeIntervalType.fieldToString(field) + + JObject( + "name" -> JString("interval"), + "start_unit" -> JString(getFieldName(dayTimeIntervalType.startField)), + "end_unit" -> JString(getFieldName(dayTimeIntervalType.endField)) + ) + + case _ => + JObject("name" -> JString(dataType.typeName)) + } + } + + private def describeColsJson( + schema: StructType, + jsonMap: mutable.LinkedHashMap[String, JValue], + header: Boolean): Unit = { + val columnsJson = jsonType(StructType(schema.fields)) + .asInstanceOf[JObject].find(_.isInstanceOf[JArray]).get + addKeyValueToMap("columns", columnsJson, jsonMap) + } + + private def describeClusteringInfoJson( + table: CatalogTable, jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = { + table.clusterBySpec.foreach { clusterBySpec => + val clusteringColumnsJson: JValue = JArray( + clusterBySpec.columnNames.map { fieldNames => + val nestedFieldOpt = table.schema.findNestedField(fieldNames.fieldNames.toIndexedSeq) + assert(nestedFieldOpt.isDefined, + "The clustering column " + + s"${fieldNames.fieldNames.map(quoteIfNeeded).mkString(".")} " + + s"was not found in the table schema ${table.schema.catalogString}." + ) + val (path, field) = nestedFieldOpt.get + JObject( + "name" -> JString((path :+ field.name).map(quoteIfNeeded).mkString(".")), + "type" -> jsonType(field.dataType), + "comment" -> field.getComment().map(JString).getOrElse(JNull) + ) + }.toList + ) + addKeyValueToMap("clustering_information", clusteringColumnsJson, jsonMap) + } + } + + private def describeFormattedTableInfoJson( + table: CatalogTable, jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = { + table.bucketSpec match { + case Some(spec) => + spec.toJsonLinkedHashMap.foreach { case (key, value) => + addKeyValueToMap(key, value, jsonMap) + } + case _ => + } + table.storage.toJsonLinkedHashMap.foreach { case (key, value) => + addKeyValueToMap(key, value, jsonMap) + } + + val filteredTableInfo = table.toJsonLinkedHashMap + + filteredTableInfo.map { case (key, value) => + addKeyValueToMap(key, value, jsonMap) + } + } + + private def describePartitionInfoJson( + spark: SparkSession, + catalog: SessionCatalog, + metadata: CatalogTable, + jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = { + if (metadata.tableType == CatalogTableType.VIEW) { + throw QueryCompilationErrors.descPartitionNotAllowedOnView(table.identifier) + } + + DDLUtils.verifyPartitionProviderIsHive(spark, metadata, "DESC PARTITION") + val normalizedPartSpec = PartitioningUtils.normalizePartitionSpec( + partitionSpec, + metadata.partitionSchema, + table.quotedString, + spark.sessionState.conf.resolver) + val partition = catalog.getPartition(table, normalizedPartSpec) + + // First add partition details to jsonMap. + // `addKeyValueToMap` only adds unique keys, so this ensures the + // more detailed partition information is added + // in the case of duplicated key names (e.g. storage_information). + partition.toJsonLinkedHashMap.map { case (key, value) => + addKeyValueToMap(key, value, jsonMap) + } + + metadata.toJsonLinkedHashMap.map { case (key, value) => + addKeyValueToMap(key, value, jsonMap) + } + + metadata.bucketSpec match { + case Some(spec) => + spec.toJsonLinkedHashMap.map { case (key, value) => + addKeyValueToMap(key, value, jsonMap) + } + case _ => + } + metadata.storage.toJsonLinkedHashMap.map { case (key, value) => + addKeyValueToMap(key, value, jsonMap) + } + } +} + /** * Command that looks like * {{{ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 499721fbae4e8..f7a3be9254758 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -344,6 +344,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case DescribeNamespace(ResolvedNamespace(catalog, ns, _), extended, output) => DescribeNamespaceExec(output, catalog.asNamespaceCatalog, ns, extended) :: Nil + case DescribeRelationJson(_, _, _) => + throw QueryCompilationErrors.describeAsJsonNotSupportedForV2TablesError() + case DescribeRelation(r: ResolvedTable, partitionSpec, isExtended, output) => if (partitionSpec.nonEmpty) { throw QueryCompilationErrors.describeDoesNotSupportPartitionForV2TablesError() diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out index ff0935bfd03ec..f52f69a5ff808 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out @@ -56,6 +56,35 @@ DESCRIBE t DescribeTableCommand `spark_catalog`.`default`.`t`, false, [col_name#x, data_type#x, comment#x] +-- !query +DESCRIBE EXTENDED t AS JSON +-- !query analysis +DescribeTableJsonCommand `spark_catalog`.`default`.`t`, true + + +-- !query +DESCRIBE t AS JSON +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "DESCRIBE_JSON_NOT_EXTENDED", + "sqlState" : "0A000", + "messageParameters" : { + "tableName" : "t" + } +} + + +-- !query +DESC FORMATTED t a AS JSON +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON", + "sqlState" : "0A000" +} + + -- !query DESC default.t -- !query analysis @@ -110,6 +139,12 @@ DESC t PARTITION (c='Us', d=1) DescribeTableCommand `spark_catalog`.`default`.`t`, [c=Us, d=1], false, [col_name#x, data_type#x, comment#x] +-- !query +DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON +-- !query analysis +DescribeTableJsonCommand `spark_catalog`.`default`.`t`, [c=Us, d=1], true + + -- !query DESC EXTENDED t PARTITION (c='Us', d=1) -- !query analysis @@ -290,6 +325,12 @@ EXPLAIN DESCRIBE t PARTITION (c='Us', d=2) ExplainCommand 'DescribeRelation [c=Us, d=2], false, [col_name#x, data_type#x, comment#x], SimpleMode +-- !query +EXPLAIN DESCRIBE EXTENDED t PARTITION (c='Us', d=2) AS JSON +-- !query analysis +ExplainCommand 'DescribeRelationJson [c=Us, d=2], true, SimpleMode + + -- !query DROP TABLE t -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql index b37931456d00c..aa6f38defdecc 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql @@ -21,6 +21,14 @@ ALTER TABLE t ADD PARTITION (c='Us', d=1); DESCRIBE t; +DESCRIBE EXTENDED t AS JSON; + +-- AnalysisException: describe table as json must be extended +DESCRIBE t AS JSON; + +-- AnalysisException: describe col as json unsupported +DESC FORMATTED t a AS JSON; + DESC default.t; DESC TABLE t; @@ -39,6 +47,8 @@ DESC EXTENDED t; DESC t PARTITION (c='Us', d=1); +DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON; + DESC EXTENDED t PARTITION (c='Us', d=1); DESC FORMATTED t PARTITION (c='Us', d=1); @@ -88,6 +98,7 @@ EXPLAIN DESC EXTENDED t; EXPLAIN EXTENDED DESC t; EXPLAIN DESCRIBE t b; EXPLAIN DESCRIBE t PARTITION (c='Us', d=2); +EXPLAIN DESCRIBE EXTENDED t PARTITION (c='Us', d=2) AS JSON; -- DROP TEST TABLES/VIEWS DROP TABLE t; @@ -119,3 +130,4 @@ DESC EXTENDED e; DESC TABLE EXTENDED e; DESC FORMATTED e; + diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index 0f51816c145e8..015b0ceff335e 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -71,6 +71,41 @@ c string d string +-- !query +DESCRIBE EXTENDED t AS JSON +-- !query schema +struct +-- !query output +{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string"},"nullable":true},{"name":"b","type":{"name":"integer"},"nullable":true},{"name":"c","type":{"name":"string"},"nullable":true},{"name":"d","type":{"name":"string"},"nullable":true}],"num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"location":"file:[not included in comparison]/{warehouse_dir}/t","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","comment":"table_comment","table_properties":{"e":"3","password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"]} + + +-- !query +DESCRIBE t AS JSON +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "DESCRIBE_JSON_NOT_EXTENDED", + "sqlState" : "0A000", + "messageParameters" : { + "tableName" : "t" + } +} + + +-- !query +DESC FORMATTED t a AS JSON +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON", + "sqlState" : "0A000" +} + + -- !query DESC default.t -- !query schema @@ -263,6 +298,14 @@ c string d string +-- !query +DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON +-- !query schema +struct +-- !query output +{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string"},"nullable":true},{"name":"b","type":{"name":"integer"},"nullable":true},{"name":"c","type":{"name":"string"},"nullable":true},{"name":"d","type":{"name":"string"},"nullable":true}],"partition_values":{"c":"Us","d":"1"},"location":"file:[not included in comparison]/{warehouse_dir}/t/c=Us/d=1","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"table_properties":{"password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"]} + + -- !query DESC EXTENDED t PARTITION (c='Us', d=1) -- !query schema @@ -644,6 +687,16 @@ Execute DescribeTableCommand +- DescribeTableCommand `spark_catalog`.`default`.`t`, [c=Us, d=2], false, [col_name#x, data_type#x, comment#x] +-- !query +EXPLAIN DESCRIBE EXTENDED t PARTITION (c='Us', d=2) AS JSON +-- !query schema +struct +-- !query output +== Physical Plan == +Execute DescribeTableJsonCommand + +- DescribeTableJsonCommand `spark_catalog`.`default`.`t`, [c=Us, d=2], true + + -- !query DROP TABLE t -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out index f9c9df3f9bf5f..521b0afe19264 100644 --- a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out @@ -170,6 +170,7 @@ IS true ITEMS false ITERATE false JOIN true +JSON false KEYS false LANGUAGE false LAST false diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out index 67e5e4170d789..4d702588ad2b3 100644 --- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out @@ -170,6 +170,7 @@ IS false ITEMS false ITERATE false JOIN false +JSON false KEYS false LANGUAGE false LAST false diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out index 67e5e4170d789..4d702588ad2b3 100644 --- a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out @@ -170,6 +170,7 @@ IS false ITEMS false ITERATE false JOIN false +JSON false KEYS false LANGUAGE false LAST false diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out index cc32e2eff2551..a4b967ca61f08 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out @@ -128,7 +128,6 @@ View Schema Mode: BINDING Schema: root |-- e: integer (nullable = true) - showdb show_t1 false Catalog: spark_catalog Database: showdb Table: show_t1 @@ -146,7 +145,6 @@ Schema: root |-- c: string (nullable = true) |-- d: string (nullable = true) - showdb show_t2 false Catalog: spark_catalog Database: showdb Table: show_t2 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index 7daf2c6b1b58b..04f274e4af592 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -60,7 +60,18 @@ trait SQLQueryTestHelper extends Logging { .replaceAll("CTERelationDef \\d+,", s"CTERelationDef xxxx,") .replaceAll("CTERelationRef \\d+,", s"CTERelationRef xxxx,") .replaceAll("@\\w*,", s"@xxxxxxxx,") - .replaceAll("\\*\\(\\d+\\) ", "*") // remove the WholeStageCodegen codegenStageIds + .replaceAll("\\*\\(\\d+\\) ", "*") + .replaceAll( + s""""location":.*?$clsName/""", + s""""location": "$notIncludedMsg/{warehouse_dir}/""") + .replaceAll(s""""created_by":".*?"""", s""""created_by $notIncludedMsg":"None"""") + .replaceAll(s""""created_time":".*?"""", s""""created_time $notIncludedMsg":"None"""") + .replaceAll(s""""last_access":".*?"""", s""""last_access $notIncludedMsg":"None"""") + .replaceAll(s""""owner":".*?"""", s""""owner $notIncludedMsg":"None"""") + .replaceAll(s""""partition_statistics":"\\d+"""", + s""""partition_statistics $notIncludedMsg":"None"""") + .replaceAll("cterelationdef \\d+,", "cterelationdef xxxx,") + .replaceAll("cterelationref \\d+,", "cterelationref xxxx,") } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala index 944f20bf8e924..d81f007e2a4d3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.execution.command +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedTableOrView} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan import org.apache.spark.sql.catalyst.plans.logical.{DescribeColumn, DescribeRelation} @@ -75,6 +76,12 @@ class DescribeTableParserSuite extends AnalysisTest { UnresolvedAttribute(Seq("col")), isExtended = true)) + val error = intercept[AnalysisException](parsePlan("DESCRIBE EXTENDED t col AS JSON")) + + checkError( + exception = error, + condition = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON") + val sql = "DESCRIBE TABLE t PARTITION (ds='1970-01-01') col" checkError( exception = parseException(parsePlan)(sql), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 92467cbcb6c05..541fec1cb3740 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast, EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, Literal, StringLiteral} import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} -import org.apache.spark.sql.catalyst.plans.logical.{AlterColumn, AnalysisOnlyCommand, AppendData, Assignment, CreateTable, CreateTableAsSelect, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, OverwriteByExpression, OverwritePartitionsDynamic, Project, SetTableLocation, SetTableProperties, ShowTableProperties, SubqueryAlias, UnsetTableProperties, UpdateAction, UpdateTable} +import org.apache.spark.sql.catalyst.plans.logical.{AlterColumn, AnalysisOnlyCommand, AppendData, Assignment, CreateTable, CreateTableAsSelect, DeleteAction, DeleteFromTable, DescribeRelation, DescribeRelationJson, DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, OverwriteByExpression, OverwritePartitionsDynamic, Project, SetTableLocation, SetTableProperties, ShowTableProperties, SubqueryAlias, UnsetTableProperties, UpdateAction, UpdateTable} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId import org.apache.spark.sql.connector.FakeV2Provider @@ -961,6 +961,43 @@ class PlanResolutionSuite extends AnalysisTest { assert(parsed4.isInstanceOf[DescribeTableCommand]) } + test("DESCRIBE AS JSON relation") { + Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach { + case (tblName, useV1Command) => + val sql = s"DESC TABLE EXTENDED $tblName AS JSON" + val parsed = parseAndResolve(sql) + if (useV1Command) { + val expected2 = DescribeTableJsonCommand( + TableIdentifier(tblName, Some("default"), Some(SESSION_CATALOG_NAME)), + Map.empty, true) + + comparePlans(parsed, expected2) + } else { + parsed match { + case DescribeRelationJson(_: ResolvedTable, _, isExtended) => + assert(isExtended) + case _ => fail("Expect DescribeTable, but got:\n" + parsed.treeString) + } + } + + val sql2 = s"DESC TABLE EXTENDED $tblName PARTITION(a=1) AS JSON" + val parsed2 = parseAndResolve(sql2) + if (useV1Command) { + val expected2 = DescribeTableJsonCommand( + TableIdentifier(tblName, Some("default"), Some(SESSION_CATALOG_NAME)), + Map("a" -> "1"), true) + comparePlans(parsed2, expected2) + } else { + parsed2 match { + case DescribeRelationJson(_: ResolvedTable, partitionSpec, isExtended) => + assert(isExtended) + assert(partitionSpec == Map("a" -> "1")) + case _ => fail("Expect DescribeTable, but got:\n" + parsed2.treeString) + } + } + } + } + test("DELETE FROM") { Seq("v2Table", "testcat.tab").foreach { tblName => val sql1 = s"DELETE FROM $tblName" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala index 164ac2bff9f63..4413087e886e8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala @@ -19,6 +19,9 @@ package org.apache.spark.sql.execution.command.v1 import java.util.Locale +import org.json4s._ +import org.json4s.jackson.JsonMethods.parse + import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME import org.apache.spark.sql.execution.command @@ -36,6 +39,7 @@ import org.apache.spark.sql.types.StringType */ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase with command.TestsV1AndV2Commands { + implicit val formats: org.json4s.DefaultFormats.type = org.json4s.DefaultFormats def getProvider(): String = defaultUsing.stripPrefix("USING").trim.toLowerCase(Locale.ROOT) @@ -276,4 +280,558 @@ class DescribeTableSuite extends DescribeTableSuiteBase with CommandSuiteBase { )) } } + + test("DESCRIBE AS JSON throws when not EXTENDED") { + withNamespaceAndTable("ns", "table") { t => + val tableCreationStr = + s""" + |CREATE TABLE $t ( + | employee_id INT, + | employee_name STRING, + | department STRING, + | hire_date DATE + |) USING parquet + |OPTIONS ('compression' = 'snappy', 'max_records' = '1000') + |PARTITIONED BY (department, hire_date) + |CLUSTERED BY (employee_id) SORTED BY (employee_name ASC) INTO 4 BUCKETS + |COMMENT 'Employee data table for testing partitions and buckets' + |TBLPROPERTIES ('version' = '1.0') + |""".stripMargin + spark.sql(tableCreationStr) + + val error = intercept[AnalysisException] { + spark.sql(s"DESCRIBE $t AS JSON") + } + + checkError( + exception = error, + condition = "DESCRIBE_JSON_NOT_EXTENDED", + parameters = Map("tableName" -> "table")) + } + } + + test("DESCRIBE AS JSON partitions, clusters, buckets") { + withNamespaceAndTable("ns", "table") { t => + val tableCreationStr = + s""" + |CREATE TABLE $t ( + | employee_id INT, + | employee_name STRING, + | department STRING, + | hire_date DATE + |) USING parquet + |OPTIONS ('compression' = 'snappy', 'max_records' = '1000') + |PARTITIONED BY (department, hire_date) + |CLUSTERED BY (employee_id) SORTED BY (employee_name ASC) INTO 4 BUCKETS + |COMMENT 'Employee data table for testing partitions and buckets' + |TBLPROPERTIES ('version' = '1.0') + |""".stripMargin + spark.sql(tableCreationStr) + val descriptionDf = spark.sql(s"DESCRIBE EXTENDED $t AS JSON") + val firstRow = descriptionDf.select("json_metadata").head() + val jsonValue = firstRow.getString(0) + val parsedOutput = parse(jsonValue).extract[DescribeTableJson] + + val expectedOutput = DescribeTableJson( + table_name = Some("table"), + catalog_name = Some(SESSION_CATALOG_NAME), + namespace = Some(List("ns")), + schema_name = Some("ns"), + columns = Some(List( + TableColumn("employee_id", Type("integer"), true), + TableColumn("employee_name", Type("string"), true), + TableColumn("department", Type("string"), true), + TableColumn("hire_date", Type("date"), true) + )), + owner = Some(""), + created_time = Some(""), + last_access = Some("UNKNOWN"), + created_by = Some("Spark 4.0.0-SNAPSHOT"), + `type` = Some("MANAGED"), + provider = Some("parquet"), + bucket_columns = Some(List("employee_id")), + sort_columns = Some(List("employee_name")), + comment = Some("Employee data table for testing partitions and buckets"), + table_properties = Some(Map( + "version" -> "1.0" + )), + location = Some(""), + serde_library = Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"), + inputformat = Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"), + outputformat = Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"), + storage_properties = Some(Map( + "compression" -> "snappy", + "max_records" -> "1000" + )), + partition_provider = Some("Catalog"), + partition_columns = Some(List("department", "hire_date")) + ) + + if (getProvider() == "hive") { + assert(expectedOutput == parsedOutput.copy(owner = Some(""), + created_time = Some(""), + location = Some(""))) + } else { + assert(expectedOutput.copy(inputformat = None, outputformat = None, serde_library = None) + == parsedOutput.copy(owner = Some(""), created_time = Some(""), location = Some(""))) + } + } + } + + test("DESCRIBE AS JSON partition spec") { + withNamespaceAndTable("ns", "table") { t => + val tableCreationStr = + s""" + |CREATE TABLE $t ( + | id INT, + | name STRING, + | region STRING, + | category STRING + |) USING parquet + |PARTITIONED BY (region, category) + |COMMENT 'test partition spec' + |TBLPROPERTIES ('t' = 'test') + |""".stripMargin + spark.sql(tableCreationStr) + spark.sql(s"ALTER TABLE $t ADD PARTITION (region='USA', category='tech')") + + val descriptionDf = + spark.sql(s"DESCRIBE FORMATTED $t PARTITION (region='USA', category='tech') AS JSON") + val firstRow = descriptionDf.select("json_metadata").head() + val jsonValue = firstRow.getString(0) + val parsedOutput = parse(jsonValue).extract[DescribeTableJson] + + val expectedOutput = DescribeTableJson( + table_name = Some("table"), + catalog_name = Some("spark_catalog"), + namespace = Some(List("ns")), + schema_name = Some("ns"), + columns = Some(List( + TableColumn("id", Type("integer"), true), + TableColumn("name", Type("string"), true), + TableColumn("region", Type("string"), true), + TableColumn("category", Type("string"), true) + )), + last_access = Some("UNKNOWN"), + created_by = Some("Spark 4.0.0-SNAPSHOT"), + `type` = Some("MANAGED"), + provider = Some("parquet"), + bucket_columns = Some(Nil), + sort_columns = Some(Nil), + comment = Some("test partition spec"), + table_properties = Some(Map( + "t" -> "test" + )), + serde_library = Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"), + inputformat = Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"), + outputformat = Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"), + storage_properties = Some(Map( + "serialization.format" -> "1" + )), + partition_provider = Some("Catalog"), + partition_columns = Some(List("region", "category")), + partition_values = Some(Map("region" -> "USA", "category" -> "tech")) + ) + + val filteredParsedStorageProperties = + parsedOutput.storage_properties.map(_.filterNot { case (key, _) => key == "path" }) + + if (getProvider() == "hive") { + assert(expectedOutput == + parsedOutput.copy(location = None, created_time = None, owner = None, + storage_properties = filteredParsedStorageProperties)) + } else { + assert(expectedOutput.copy( + inputformat = None, outputformat = None, serde_library = None, storage_properties = None) + == parsedOutput.copy(location = None, created_time = None, owner = None, + storage_properties = filteredParsedStorageProperties)) + } + } + } + + test("DESCRIBE AS JSON default values") { + withNamespaceAndTable("ns", "table") { t => + val tableCreationStr = + s""" + |CREATE TABLE $t ( + | id INT DEFAULT 1, + | name STRING DEFAULT 'unknown', + | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + | is_active BOOLEAN DEFAULT true + |) + |USING parquet COMMENT 'table_comment' + |""".stripMargin + spark.sql(tableCreationStr) + + val descriptionDf = spark.sql(s"DESC EXTENDED $t AS JSON") + val firstRow = descriptionDf.select("json_metadata").head() + val jsonValue = firstRow.getString(0) + val parsedOutput = parse(jsonValue).extract[DescribeTableJson] + + val expectedOutput = DescribeTableJson( + table_name = Some("table"), + catalog_name = Some("spark_catalog"), + namespace = Some(List("ns")), + schema_name = Some("ns"), + columns = Some(List( + TableColumn("id", Type("integer"), default = Some("1")), + TableColumn("name", Type("string"), default = Some("'unknown'")), + TableColumn("created_at", Type("timestamp_ltz"), default = Some("CURRENT_TIMESTAMP")), + TableColumn("is_active", Type("boolean"), default = Some("true")) + )), + last_access = Some("UNKNOWN"), + created_by = Some("Spark 4.0.0-SNAPSHOT"), + `type` = Some("MANAGED"), + storage_properties = None, + provider = Some("parquet"), + bucket_columns = Some(Nil), + sort_columns = Some(Nil), + comment = Some("table_comment"), + serde_library = Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"), + inputformat = Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"), + outputformat = Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"), + table_properties = None + ) + if (getProvider() == "hive") { + assert( + expectedOutput == + parsedOutput.copy(location = None, created_time = None, owner = None) + ) + } else { + assert( + expectedOutput.copy(inputformat = None, outputformat = None, serde_library = None) == + parsedOutput.copy(location = None, created_time = None, owner = None) + ) + } + } + } + + test("DESCRIBE AS JSON temp view") { + withNamespaceAndTable("ns", "table") { t => + withTempView("temp_view") { + val tableCreationStr = + s""" + |CREATE TABLE $t (id INT, name STRING, created_at TIMESTAMP) + | USING parquet + | OPTIONS ('compression' 'snappy') + | CLUSTERED BY (id, name) SORTED BY (created_at) INTO 4 BUCKETS + | COMMENT 'test temp view' + | TBLPROPERTIES ('parquet.encryption' = 'true') + |""".stripMargin + spark.sql(tableCreationStr) + spark.sql(s"CREATE TEMPORARY VIEW temp_view AS SELECT * FROM $t") + val descriptionDf = spark.sql(s"DESCRIBE EXTENDED temp_view AS JSON") + val firstRow = descriptionDf.select("json_metadata").head() + val jsonValue = firstRow.getString(0) + val parsedOutput = parse(jsonValue).extract[DescribeTableJson] + + val expectedOutput = DescribeTableJson( + columns = Some(List( + TableColumn("id", Type("integer")), + TableColumn("name", Type("string")), + TableColumn("created_at", Type("timestamp_ltz")) + )) + ) + + assert(expectedOutput == parsedOutput) + } + } + } + + test("DESCRIBE AS JSON persistent view") { + withNamespaceAndTable("ns", "table") { t => + withView("view") { + val tableCreationStr = + s""" + |CREATE TABLE $t (id INT, name STRING, created_at TIMESTAMP) + | USING parquet + | OPTIONS ('compression' 'snappy') + | CLUSTERED BY (id, name) SORTED BY (created_at) INTO 4 BUCKETS + | COMMENT 'test temp view' + | TBLPROPERTIES ('parquet.encryption' = 'true') + |""".stripMargin + spark.sql(tableCreationStr) + spark.sql(s"CREATE VIEW view AS SELECT * FROM $t") + val descriptionDf = spark.sql(s"DESCRIBE EXTENDED view AS JSON") + val firstRow = descriptionDf.select("json_metadata").head() + val jsonValue = firstRow.getString(0) + val parsedOutput = parse(jsonValue).extract[DescribeTableJson] + + val expectedOutput = DescribeTableJson( + table_name = Some("view"), + catalog_name = Some("spark_catalog"), + namespace = Some(List("default")), + schema_name = Some("default"), + columns = Some(List( + TableColumn("id", Type("integer")), + TableColumn("name", Type("string")), + TableColumn("created_at", Type("timestamp_ltz")) + )), + serde_library = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"), + inputformat = Some("org.apache.hadoop.mapred.SequenceFileInputFormat"), + outputformat = Some("org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"), + storage_properties = Some(Map("serialization.format" -> "1")), + last_access = Some("UNKNOWN"), + created_by = Some("Spark 4.0.0-SNAPSHOT"), + `type` = Some("VIEW"), + view_text = Some("SELECT * FROM spark_catalog.ns.table"), + view_original_text = Some("SELECT * FROM spark_catalog.ns.table"), + view_schema_mode = Some("COMPENSATION"), + view_catalog_and_namespace = Some("spark_catalog.default"), + view_query_output_columns = Some(List("id", "name", "created_at")) + ) + + if (getProvider() == "hive") { + assert(expectedOutput == + parsedOutput.copy(table_properties = None, created_time = None, owner = None)) + } else { + assert(expectedOutput.copy(inputformat = None, + outputformat = None, serde_library = None, storage_properties = None) + == parsedOutput.copy(table_properties = None, created_time = None, owner = None)) + } + } + } + } + + test("DESCRIBE AS JSON for column throws Analysis Exception") { + withNamespaceAndTable("ns", "table") { t => + val tableCreationStr = + s""" + |CREATE TABLE ns.table( + | cust_id INT, + | state VARCHAR(20), + | name STRING COMMENT "Short name" + | ) + | USING parquet + | PARTITIONED BY (state) + |""".stripMargin + spark.sql(tableCreationStr) + spark.sql("INSERT INTO ns.table PARTITION (state = \"CA\") VALUES (100, \"Jane\")") + val error = intercept[AnalysisException] { + spark.sql("DESCRIBE FORMATTED ns.table ns.table.name AS JSON") + } + + checkError( + exception = error, + condition = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON") + } + } + + test("DESCRIBE AS JSON complex types") { + withNamespaceAndTable("ns", "table") { t => + val tableCreationStr = + s""" + |CREATE TABLE $t ( + | id STRING, + | logs VARIANT, + | nested_struct STRUCT< + | name: STRING, + | age: INT, + | contact: STRUCT< + | email: STRING, + | phone_numbers: ARRAY, + | addresses: ARRAY> + | > + | >, + | preferences MAP> + |) USING parquet + | OPTIONS (option1 'value1', option2 'value2') + | PARTITIONED BY (id) + | COMMENT 'A table with nested complex types' + | TBLPROPERTIES ('property1' = 'value1', 'password' = 'password') + """.stripMargin + spark.sql(tableCreationStr) + val descriptionDf = spark.sql(s"DESCRIBE EXTENDED $t AS JSON") + val firstRow = descriptionDf.select("json_metadata").head() + val jsonValue = firstRow.getString(0) + val parsedOutput = parse(jsonValue).extract[DescribeTableJson] + + val expectedOutput = DescribeTableJson( + table_name = Some("table"), + catalog_name = Some("spark_catalog"), + namespace = Some(List("ns")), + schema_name = Some("ns"), + columns = Some(List( + TableColumn( + name = "logs", + `type` = Type("variant"), + default = None + ), + TableColumn( + name = "nested_struct", + `type` = Type( + name = "struct", + fields = Some(List( + Field( + name = "name", + `type` = Type("string") + ), + Field( + name = "age", + `type` = Type("integer") + ), + Field( + name = "contact", + `type` = Type( + name = "struct", + fields = Some(List( + Field( + name = "email", + `type` = Type("string") + ), + Field( + name = "phone_numbers", + `type` = Type( + name = "array", + element_type = Some(Type("string")), + element_nullable = Some(true) + ) + ), + Field( + name = "addresses", + `type` = Type( + name = "array", + element_type = Some(Type( + name = "struct", + fields = Some(List( + Field( + name = "street", + `type` = Type("string") + ), + Field( + name = "city", + `type` = Type("string") + ), + Field( + name = "zip", + `type` = Type("integer") + ) + )) + )), + element_nullable = Some(true) + ) + ) + )) + ) + ) + )) + ), + default = None + ), + TableColumn( + name = "preferences", + `type` = Type( + name = "map", + key_type = Some(Type("string")), + value_type = Some(Type( + name = "array", + element_type = Some(Type("string")), + element_nullable = Some(true) + )), + value_nullable = Some(true) + ), + default = None + ), + TableColumn( + name = "id", + `type` = Type("string"), + default = None + ) + )), + serde_library = Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"), + inputformat = Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"), + outputformat = Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"), + storage_properties = Some(Map( + "option1" -> "value1", + "option2" -> "value2" + )), + last_access = Some("UNKNOWN"), + created_by = Some("Spark 4.0.0-SNAPSHOT"), + `type` = Some("MANAGED"), + provider = Some("parquet"), + comment = Some("A table with nested complex types"), + table_properties = Some(Map( + "password" -> "*********(redacted)", + "property1" -> "value1" + )), + partition_provider = Some("Catalog"), + partition_columns = Some(List("id")) + ) + + if (getProvider() == "hive") { + assert(expectedOutput == + parsedOutput.copy(location = None, created_time = None, owner = None)) + } else { + assert(expectedOutput.copy(inputformat = None, outputformat = None, serde_library = None) + == parsedOutput.copy(location = None, created_time = None, owner = None)) + } + } + } } + +/** Represents JSON output of DESCRIBE TABLE AS JSON */ +case class DescribeTableJson( + table_name: Option[String] = None, + catalog_name: Option[String] = None, + namespace: Option[List[String]] = Some(Nil), + schema_name: Option[String] = None, + columns: Option[List[TableColumn]] = Some(Nil), + owner: Option[String] = None, + created_time: Option[String] = None, + last_access: Option[String] = None, + created_by: Option[String] = None, + `type`: Option[String] = None, + provider: Option[String] = None, + bucket_columns: Option[List[String]] = Some(Nil), + sort_columns: Option[List[String]] = Some(Nil), + comment: Option[String] = None, + table_properties: Option[Map[String, String]] = None, + location: Option[String] = None, + serde_library: Option[String] = None, + inputformat: Option[String] = None, + outputformat: Option[String] = None, + storage_properties: Option[Map[String, String]] = None, + partition_provider: Option[String] = None, + partition_columns: Option[List[String]] = Some(Nil), + partition_values: Option[Map[String, String]] = None, + view_text: Option[String] = None, + view_original_text: Option[String] = None, + view_schema_mode: Option[String] = None, + view_catalog_and_namespace: Option[String] = None, + view_query_output_columns: Option[List[String]] = None + ) + +/** Used for columns field of DescribeTableJson */ +case class TableColumn( + name: String, + `type`: Type, + element_nullable: Boolean = true, + comment: Option[String] = None, + default: Option[String] = None +) + +case class Type( + name: String, + fields: Option[List[Field]] = None, + `type`: Option[Type] = None, + element_type: Option[Type] = None, + key_type: Option[Type] = None, + value_type: Option[Type] = None, + comment: Option[String] = None, + default: Option[String] = None, + element_nullable: Option[Boolean] = Some(true), + value_nullable: Option[Boolean] = Some(true), + nullable: Option[Boolean] = Some(true) +) + +case class Field( + name: String, + `type`: Type, + element_nullable: Boolean = true, + comment: Option[String] = None, + default: Option[String] = None +) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala index da0ddd3a156f7..254eda69e86e8 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala @@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer { val sessionHandle = client.openSession(user, "") val infoValue = client.getInfo(sessionHandle, GetInfoType.CLI_ODBC_KEYWORDS) // scalastyle:off line.size.limit - assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,END,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXECUTE,EXISTS,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GLOBAL,GRANT,GROUP,GROUPING,HAVING,HOUR,HOURS,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MERGE,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NO,NONE,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,RECURSIVE,REDUCE,REFERENCES,REFRESH,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,START,STATISTICS,STORED,STRATIFY,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,X,YEAR,YEARS,ZONE") + assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,END,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXECUTE,EXISTS,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GLOBAL,GRANT,GROUP,GROUPING,HAVING,HOUR,HOURS,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,JSON,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MERGE,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NO,NONE,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,RECURSIVE,REDUCE,REFERENCES,REFRESH,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,START,STATISTICS,STORED,STRATIFY,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,X,YEAR,YEARS,ZONE") // scalastyle:on line.size.limit } }