Skip to content

Commit ea094d8

Browse files
lswyyytdas
authored andcommittedFeb 26, 2020
[DELTA-OSS-EXTERNAL] Generate does not update manifest if delete all data from unpartitioned tables
fixes #275 Closes #277 Closes #7931 from tdas/1u0hfort. Lead-authored-by: Tathagata Das <tathagata.das1565@gmail.com> Co-authored-by: Tathagata Das <tdas@databricks.com> Co-authored-by: lswyyy <228930204@qq.com> Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com> GitOrigin-RevId: fa35d0ea76e2973e84134e34e901be50d17e0f39
1 parent 21d2af1 commit ea094d8

File tree

2 files changed

+27
-3
lines changed

2 files changed

+27
-3
lines changed
 

‎src/main/scala/org/apache/spark/sql/delta/hooks/GenerateSymlinkManifest.scala

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,13 @@ trait GenerateSymlinkManifestImpl extends PostCommitHook with DeltaLogging with
244244
}
245245

246246
val newManifestPartitionRelativePaths =
247-
withRelativePartitionDir(spark, partitionCols, fileNamesForManifest)
248-
.select("relativePartitionDir", "path").as[(String, String)]
249-
.groupByKey(_._1).mapGroups {
247+
if (fileNamesForManifest.isEmpty && partitionCols.isEmpty) {
248+
writeSingleManifestFile(manifestRootDirPath, Iterator())
249+
Set.empty[String]
250+
} else {
251+
withRelativePartitionDir(spark, partitionCols, fileNamesForManifest)
252+
.select("relativePartitionDir", "path").as[(String, String)]
253+
.groupByKey(_._1).mapGroups {
250254
(relativePartitionDir: String, relativeDataFilePath: Iterator[(String, String)]) =>
251255
val manifestPartitionDirAbsPath = {
252256
if (relativePartitionDir == null || relativePartitionDir.isEmpty) manifestRootDirPath
@@ -255,6 +259,7 @@ trait GenerateSymlinkManifestImpl extends PostCommitHook with DeltaLogging with
255259
writeSingleManifestFile(manifestPartitionDirAbsPath, relativeDataFilePath.map(_._2))
256260
relativePartitionDir
257261
}.collect().toSet
262+
}
258263

259264
logInfo(s"Generated manifest partitions for $deltaLogDataPath " +
260265
s"[${newManifestPartitionRelativePaths.size}]:\n\t" +

‎src/test/scala/org/apache/spark/sql/delta/DeltaGenerateSymlinkManifestSuite.scala

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,15 @@ trait DeltaGenerateSymlinkManifestSuiteBase extends QueryTest
6666
generateSymlinkManifest(tablePath.toString)
6767
assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 1)
6868
assert(spark.read.format("delta").load(tablePath.toString).count() == 0)
69+
70+
// delete all data
71+
write(5)
72+
assertManifest(tablePath, expectSameFiles = false, expectedNumFiles = 1)
73+
val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tablePath.toString)
74+
deltaTable.delete()
75+
generateSymlinkManifest(tablePath.toString)
76+
assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 0)
77+
assert(spark.read.format("delta").load(tablePath.toString).count() == 0)
6978
}
7079
}
7180

@@ -114,6 +123,16 @@ trait DeltaGenerateSymlinkManifestSuiteBase extends QueryTest
114123
generateSymlinkManifest(tablePath.toString)
115124
assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 0)
116125
assert(spark.read.format("delta").load(tablePath.toString).count() == 0)
126+
127+
// delete all data
128+
write(5, 5, 5)
129+
generateSymlinkManifest(tablePath.toString)
130+
assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 25)
131+
val deltaTable = io.delta.tables.DeltaTable.forPath(spark, tablePath.toString)
132+
deltaTable.delete()
133+
generateSymlinkManifest(tablePath.toString)
134+
assertManifest(tablePath, expectSameFiles = true, expectedNumFiles = 0)
135+
assert(spark.read.format("delta").load(tablePath.toString).count() == 0)
117136
}
118137
}
119138

0 commit comments

Comments
 (0)
Please sign in to comment.