diff --git a/csharp/App/Backend/DeleteOldData/DeleteOldDataFromS3.cs b/csharp/App/Backend/DeleteOldData/DeleteOldDataFromS3.cs index 6eb53179b..8327d58f2 100644 --- a/csharp/App/Backend/DeleteOldData/DeleteOldDataFromS3.cs +++ b/csharp/App/Backend/DeleteOldData/DeleteOldDataFromS3.cs @@ -54,7 +54,7 @@ public static class DeleteOldDataFromS3 var bucket = s3Region.Bucket(installation.BucketName()); Console.WriteLine($"[S3Cleanup] Processing {installation.Name} (bucket: {bucket.Name})"); - var deleted = await DeleteObjectsBefore(bucket, cutoffKey); + var deleted = await DeleteObjectsBefore(bucket, cutoffTimestamp); Console.WriteLine($"[S3Cleanup] {installation.Name}: deleted {deleted} objects"); } catch (Exception ex) @@ -95,8 +95,11 @@ public static class DeleteOldDataFromS3 await foreach (var obj in bucket.ListObjects()) { - if (string.Compare(obj.Path, cutoffKey, StringComparison.Ordinal) >= 0) - break; + if (!TryGetChunkTimestamp(obj.Path, out var ts)) + continue; // skip aggregated DDMMYYYY.json and any non-chunk object + + if (ts >= cutoffTimestamp) + break; // chunks are listed in ascending order; everything after is newer hasOldData = true; if (sampleKeys.Count < 5) @@ -121,15 +124,33 @@ public static class DeleteOldDataFromS3 return string.Join("\n", results); } - private static async Task DeleteObjectsBefore(S3Bucket bucket, string cutoffKey) + // Raw 10-second data chunks are named "{unixSeconds}.json"/".csv" (10-digit timestamps). + // Device-generated aggregated files are named "DDMMYYYY.json" (max 8 digits -> < 1e9). + // Only timestamp chunks may be deleted; the threshold cleanly separates the two formats and + // avoids deleting aggregated daily files by lexicographic accident. + private const Int64 MinUnixTimestamp = 1_000_000_000; + + private static Boolean TryGetChunkTimestamp(String key, out Int64 timestamp) + { + timestamp = 0; + var file = key.Substring(key.LastIndexOf('/') + 1); + var dot = file.IndexOf('.'); + var stem = dot >= 0 ? file.Substring(0, dot) : file; + return Int64.TryParse(stem, out timestamp) && timestamp >= MinUnixTimestamp; + } + + private static async Task DeleteObjectsBefore(S3Bucket bucket, long cutoffTimestamp) { var totalDeleted = 0; var keysToDelete = new List(); await foreach (var obj in bucket.ListObjects()) { - if (string.Compare(obj.Path, cutoffKey, StringComparison.Ordinal) >= 0) - break; + if (!TryGetChunkTimestamp(obj.Path, out var ts)) + continue; // skip aggregated DDMMYYYY.json and any non-chunk object + + if (ts >= cutoffTimestamp) + break; // chunks are listed in ascending order; everything after is newer keysToDelete.Add(obj.Path);