fix detele daily data by accident

This commit is contained in:
Yinyin Liu 2026-06-18 15:32:11 +02:00
parent b0e3e47553
commit bf60286356
1 changed files with 27 additions and 6 deletions

View File

@ -54,7 +54,7 @@ public static class DeleteOldDataFromS3
var bucket = s3Region.Bucket(installation.BucketName());
Console.WriteLine($"[S3Cleanup] Processing {installation.Name} (bucket: {bucket.Name})");
var deleted = await DeleteObjectsBefore(bucket, cutoffKey);
var deleted = await DeleteObjectsBefore(bucket, cutoffTimestamp);
Console.WriteLine($"[S3Cleanup] {installation.Name}: deleted {deleted} objects");
}
catch (Exception ex)
@ -95,8 +95,11 @@ public static class DeleteOldDataFromS3
await foreach (var obj in bucket.ListObjects())
{
if (string.Compare(obj.Path, cutoffKey, StringComparison.Ordinal) >= 0)
break;
if (!TryGetChunkTimestamp(obj.Path, out var ts))
continue; // skip aggregated DDMMYYYY.json and any non-chunk object
if (ts >= cutoffTimestamp)
break; // chunks are listed in ascending order; everything after is newer
hasOldData = true;
if (sampleKeys.Count < 5)
@ -121,15 +124,33 @@ public static class DeleteOldDataFromS3
return string.Join("\n", results);
}
private static async Task<int> DeleteObjectsBefore(S3Bucket bucket, string cutoffKey)
// Raw 10-second data chunks are named "{unixSeconds}.json"/".csv" (10-digit timestamps).
// Device-generated aggregated files are named "DDMMYYYY.json" (max 8 digits -> < 1e9).
// Only timestamp chunks may be deleted; the threshold cleanly separates the two formats and
// avoids deleting aggregated daily files by lexicographic accident.
private const Int64 MinUnixTimestamp = 1_000_000_000;
private static Boolean TryGetChunkTimestamp(String key, out Int64 timestamp)
{
timestamp = 0;
var file = key.Substring(key.LastIndexOf('/') + 1);
var dot = file.IndexOf('.');
var stem = dot >= 0 ? file.Substring(0, dot) : file;
return Int64.TryParse(stem, out timestamp) && timestamp >= MinUnixTimestamp;
}
private static async Task<int> DeleteObjectsBefore(S3Bucket bucket, long cutoffTimestamp)
{
var totalDeleted = 0;
var keysToDelete = new List<string>();
await foreach (var obj in bucket.ListObjects())
{
if (string.Compare(obj.Path, cutoffKey, StringComparison.Ordinal) >= 0)
break;
if (!TryGetChunkTimestamp(obj.Path, out var ts))
continue; // skip aggregated DDMMYYYY.json and any non-chunk object
if (ts >= cutoffTimestamp)
break; // chunks are listed in ascending order; everything after is newer
keysToDelete.Add(obj.Path);