automatically delete S3 files which are over 1 year

This commit is contained in:
Yinyin Liu 2026-03-04 14:04:13 +01:00
parent f82190afc1
commit 4ac1bc78ab
4 changed files with 171 additions and 88 deletions

View File

@ -1735,6 +1735,17 @@ public class Controller : ControllerBase
"AlarmKnowledgeBaseChecked.cs"); "AlarmKnowledgeBaseChecked.cs");
} }
[HttpGet(nameof(DryRunS3Cleanup))]
public async Task<ActionResult<String>> DryRunS3Cleanup(Token authToken, long? installationId = null)
{
var user = Db.GetSession(authToken)?.User;
if (user == null)
return Unauthorized();
var result = await DeleteOldData.DeleteOldDataFromS3.DryRun(installationId);
return Ok(result);
}
} }

View File

@ -1,98 +1,157 @@
using System.Diagnostics;
using InnovEnergy.App.Backend.Database; using InnovEnergy.App.Backend.Database;
using InnovEnergy.App.Backend.DataTypes; using InnovEnergy.App.Backend.DataTypes.Methods;
using InnovEnergy.Lib.Utils; using InnovEnergy.Lib.S3Utils;
using InnovEnergy.Lib.S3Utils.DataTypes;
namespace InnovEnergy.App.Backend.DeleteOldData; namespace InnovEnergy.App.Backend.DeleteOldData;
public class DeleteOldDataFromS3 public static class DeleteOldDataFromS3
{ {
private static Timer? _cleanupTimer;
public static void DeleteFrom(Installation installation, int timestamps_to_delete) public static void StartScheduler()
{ {
var now = DateTime.UtcNow;
var next = new DateTime(now.Year, now.Month, now.Day, 3, 0, 0, DateTimeKind.Utc);
if (next <= now) next = next.AddDays(1);
string configPath = "/home/ubuntu/.s3cfg"; _cleanupTimer = new Timer(
string bucketPath = installation.Product == (int)ProductType.Salidomo _ =>
? $"s3://{installation.S3BucketId}-c0436b6a-d276-4cd8-9c44-1eae86cf5d0e/{timestamps_to_delete}*" {
: installation.Product == (int)ProductType.SodistoreGrid
? $"s3://{installation.S3BucketId}-5109c126-e141-43ab-8658-f3c44c838ae8/{timestamps_to_delete}*"
: $"s3://{installation.S3BucketId}-3e5b3069-214a-43ee-8d85-57d72000c19d/{timestamps_to_delete}*" ;
//Console.WriteLine($"Deleting old data from {bucketPath}");
Console.WriteLine("Deleting data for timestamp prefix: " + timestamps_to_delete);
try try
{ {
ProcessStartInfo startInfo = new ProcessStartInfo CleanupAllInstallations().GetAwaiter().GetResult();
{
FileName = "s3cmd",
Arguments = $"--config {configPath} rm {bucketPath}",
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
};
using Process process = new Process { StartInfo = startInfo };
process.OutputDataReceived += (sender, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
Console.WriteLine("[s3cmd] " + e.Data);
};
process.ErrorDataReceived += (sender, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
Console.WriteLine("[s3cmd-ERR] " + e.Data);
};
process.Start();
process.BeginOutputReadLine();
process.BeginErrorReadLine();
process.WaitForExit();
} }
catch (Exception ex) catch (Exception ex)
{ {
Console.WriteLine("Exception occurred during deletion: " + ex.Message); Console.Error.WriteLine($"[S3Cleanup] Scheduler error: {ex.Message}");
} }
},
null,
next - now,
TimeSpan.FromDays(1)
);
Console.WriteLine($"[S3Cleanup] Scheduled daily at 03:00 UTC, first run in {(next - now).TotalHours:F1}h");
} }
public static async Task DeleteOldData() private static async Task CleanupAllInstallations()
{ {
while (true){ var cutoffTimestamp = DateTimeOffset.UtcNow.AddYears(-1).ToUnixTimeSeconds();
var cutoffKey = cutoffTimestamp.ToString();
var installations = Db.Installations.ToList(); var installations = Db.Installations.ToList();
foreach (var installation in installations){
Console.WriteLine("DELETE S3 DATA FOR INSTALLATION "+installation.Name);
long oneYearAgoTimestamp = DateTimeOffset.UtcNow.AddYears(-1).ToUnixTimeSeconds();
Console.WriteLine("delete data before "+oneYearAgoTimestamp); Console.WriteLine($"[S3Cleanup] Starting cleanup for {installations.Count} installations, cutoff: {cutoffKey}");
for (int lastDigit=4;lastDigit>=0; lastDigit--)
{
int timestamps_to_delete = int.Parse(oneYearAgoTimestamp.ToString().Substring(0, lastDigit+1));
timestamps_to_delete--;
Console.WriteLine(timestamps_to_delete);
while (true) foreach (var installation in installations)
{ {
if (timestamps_to_delete % 10 == 0) try
{ {
Console.WriteLine("delete " + timestamps_to_delete + "*"); var s3Region = new S3Region(
DeleteFrom(installation,timestamps_to_delete); $"https://{installation.S3Region}.{installation.S3Provider}",
ExoCmd.S3Credentials
);
var bucket = s3Region.Bucket(installation.BucketName());
Console.WriteLine($"[S3Cleanup] Processing {installation.Name} (bucket: {bucket.Name})");
var deleted = await DeleteObjectsBefore(bucket, cutoffKey);
Console.WriteLine($"[S3Cleanup] {installation.Name}: deleted {deleted} objects");
}
catch (Exception ex)
{
Console.Error.WriteLine($"[S3Cleanup] Failed for {installation.Name}: {ex.Message}");
}
}
Console.WriteLine("[S3Cleanup] Finished cleanup for all installations");
}
public static async Task<string> DryRun(long? installationId = null)
{
var cutoffTimestamp = DateTimeOffset.UtcNow.AddYears(-1).ToUnixTimeSeconds();
var cutoffKey = cutoffTimestamp.ToString();
var allInstallations = Db.Installations.ToList();
var installations = installationId.HasValue
? allInstallations.Where(i => i.Id == installationId.Value).ToList()
: allInstallations;
var results = new List<string>();
results.Add($"Cutoff: {cutoffKey} ({DateTimeOffset.FromUnixTimeSeconds(cutoffTimestamp):yyyy-MM-dd HH:mm:ss} UTC)");
results.Add($"Installations: {installations.Count} (of {allInstallations.Count} total)");
results.Add("");
foreach (var installation in installations)
{
try
{
var s3Region = new S3Region(
$"https://{installation.S3Region}.{installation.S3Provider}",
ExoCmd.S3Credentials
);
var bucket = s3Region.Bucket(installation.BucketName());
var sampleKeys = new List<string>();
var hasOldData = false;
await foreach (var obj in bucket.ListObjects())
{
if (string.Compare(obj.Path, cutoffKey, StringComparison.Ordinal) >= 0)
break; break;
}
Console.WriteLine("delete " + timestamps_to_delete + "*");
DeleteFrom(installation,timestamps_to_delete);
timestamps_to_delete--;
hasOldData = true;
if (sampleKeys.Count < 5)
sampleKeys.Add(obj.Path);
else
break; // only need a sample, not full count
} }
}
}
Console.WriteLine("FINISHED DELETING S3 DATA FOR ALL INSTALLATIONS\n");
await Task.Delay(TimeSpan.FromDays(1)); results.Add($"{installation.Name} (bucket: {bucket.Name})");
results.Add($" Has old data: {(hasOldData ? "YES" : "NO")}");
if (sampleKeys.Count > 0)
results.Add($" Sample keys: {string.Join(", ", sampleKeys)}");
results.Add("");
}
catch (Exception ex)
{
results.Add($"{installation.Name}: ERROR - {ex.Message}");
results.Add("");
} }
} }
return string.Join("\n", results);
}
private static async Task<int> DeleteObjectsBefore(S3Bucket bucket, string cutoffKey)
{
var totalDeleted = 0;
var keysToDelete = new List<string>();
await foreach (var obj in bucket.ListObjects())
{
if (string.Compare(obj.Path, cutoffKey, StringComparison.Ordinal) >= 0)
break;
keysToDelete.Add(obj.Path);
if (keysToDelete.Count >= 1000)
{
if (await bucket.DeleteObjects(keysToDelete))
totalDeleted += keysToDelete.Count;
else
Console.Error.WriteLine($"[S3Cleanup] Failed to delete batch of {keysToDelete.Count} objects from {bucket.Name}");
keysToDelete.Clear();
}
}
if (keysToDelete.Count > 0)
{
if (await bucket.DeleteObjects(keysToDelete))
totalDeleted += keysToDelete.Count;
else
Console.Error.WriteLine($"[S3Cleanup] Failed to delete batch of {keysToDelete.Count} objects from {bucket.Name}");
}
return totalDeleted;
}
} }

View File

@ -38,7 +38,7 @@ public static class Program
WebsocketManager.MonitorInstallationTable().SupressAwaitWarning(); WebsocketManager.MonitorInstallationTable().SupressAwaitWarning();
// Task.Run(() => DeleteOldDataFromS3.DeleteOldData()); DeleteOldDataFromS3.StartScheduler();
builder.Services.AddControllers(); builder.Services.AddControllers();
builder.Services.AddProblemDetails(setup => builder.Services.AddProblemDetails(setup =>

View File

@ -243,6 +243,19 @@ public static class S3
} }
} }
public static async Task<Boolean> DeleteObjects(this S3Bucket bucket, IReadOnlyList<String> keys)
{
if (keys.Count == 0) return true;
var response = await bucket.Region.GetS3Client().DeleteObjectsAsync(new DeleteObjectsRequest
{
BucketName = bucket.Name,
Objects = keys.Select(k => new KeyVersion { Key = k }).ToList()
});
return response.HttpStatusCode == HttpStatusCode.OK;
}
public static async Task<Boolean> DeleteBucket(this S3Region region, String bucketName) public static async Task<Boolean> DeleteBucket(this S3Region region, String bucketName)
{ {
var request = new DeleteBucketRequest { BucketName = bucketName }; var request = new DeleteBucketRequest { BucketName = bucketName };