@@ -56,6 +56,10 @@ func ReadPropertyKeys(filePath string) ([]string, error) {
5656 return mapKeys , err
5757}
5858
59+ // We use renameio to ensure atomic writes. This prevents data corruption (partial writes)
60+ // in case of a system crash or power loss during the save operation.
61+ // NOTE: This mechanism changes the file's Inode on every write, which is why we cannot
62+ // use the data file itself for file locking (flock).
5963func UpsertProperty (filePath string , key string , value []byte ) error {
6064 if err := validateKey (key ); err != nil {
6165 return fmt .Errorf ("%w: %w" , ErrInvalidKey , err )
@@ -184,24 +188,24 @@ func emptyUnlockFunc() error {
184188 return nil
185189}
186190
191+ // getLock attempts to acquire a file lock.
192+ // We explicitly do NOT remove the lock file in case of timeout or failure, nor after a successful unlock.
193+ // Removing the lock file would introduce a "TOCTOU" (Time-of-check to Time-of-use) race condition:
194+ // 1. Process A holds the lock on Inode X.
195+ // 2. Process B times out and deletes the file (removing the directory entry for Inode X).
196+ // 3. Process C creates a NEW lock file (Inode Y) and acquires the lock.
197+ // Result: Process A and Process C would both hold valid locks on different Inodes, leading to data corruption.
198+ // Therefore, we leave the file on disk; it acts as a persistent anchor for synchronization.
187199func getLock (flock * flock.Flock , lockFn lockFunc , errorMsg string ) (UnlockFunc , error ) {
188200 ctx , cancel := context .WithTimeout (context .Background (), 3 * time .Second )
189201 defer cancel ()
190202
191203 locked , err := lockFn (ctx , 100 * time .Millisecond )
192204 if err != nil {
193205 if errors .Is (err , context .DeadlineExceeded ) {
194- if err := flock .Unlock (); err != nil {
195- slog .Error ("failed to unlock file lock" , "path" , flock .Path (), "error" , err )
196- }
197- if err := os .Remove (flock .Path ()); err != nil {
198- slog .Error ("failed to delete lock file" , "path" , flock .Path (), "error" , err )
199- }
200- locked = false
201- slog .Warn ("lock file removed due to timeout" , "path" , flock .Path ())
202- } else {
203- return emptyUnlockFunc , fmt .Errorf ("failed trying to acquire %s for %s: %w" , errorMsg , flock .Path (), err )
206+ return emptyUnlockFunc , fmt .Errorf ("timeout acquiring lock for %s" , flock .Path ())
204207 }
208+ return emptyUnlockFunc , fmt .Errorf ("failed trying to acquire %s for %s: %w" , errorMsg , flock .Path (), err )
205209 }
206210 if ! locked {
207211 return emptyUnlockFunc , fmt .Errorf ("unable to acquire %s for %s" , errorMsg , flock .Path ())
@@ -225,6 +229,10 @@ func getReadLock(filePath string) (UnlockFunc, error) {
225229 return getLock (fileLock , fileLock .TryRLockContext , "read lock" )
226230}
227231
232+ // getLockFilePath returns the path to a sidecar lock file (e.g., "data.json.lock").
233+ // We must use a separate file for locking because the main data file is written atomically
234+ // (via renameio), which changes its Inode on every save.
235+ // This sidecar file remains stable (same Inode) and acts as a persistent mutex anchor.
228236func getLockFilePath (path string ) string {
229237 return path + ".lock"
230238}
0 commit comments