Fix up handling of email cache

We were checkpointing after each email was sent to ensure that an aborted
p-m run didn't result in double emails; however, because the new cache only
contains records for packages we've seen so far during this run (to avoid
the cache growing without bounds over time), that means an aborted p-m run
*still* throws away records for all packages still waiting to be processed.

To fix this, we:
 - only checkpoint records of writing emails during this in-progress run to
   a temp file
 - check for this temp file on britney startup, and if present, merge the
   results into the current state
 - move this temp file into the final cache location only at the end of the
   britney run

Along the way, fix up a bug introduced in the previous commit that would have
us only saving state for those packages for which we sent email during the
current run, which would have quite bad effects.
sru-regression-messages
Steve Langasek 8 years ago
parent f680bb3df8
commit d5040afb84

@ -103,6 +103,14 @@ class EmailPolicy(BasePolicy, Rest):
with open(self.filename, encoding='utf-8') as data: with open(self.filename, encoding='utf-8') as data:
self.cache = json.load(data) self.cache = json.load(data)
self.log("Loaded cached email data from %s" % self.filename) self.log("Loaded cached email data from %s" % self.filename)
tmp = self.filename + '.new'
if os.path.exists(tmp):
# if we find a record on disk of emails sent from an incomplete
# britney run, merge them in now.
with open(tmp, encoding='utf-8') as data:
self.cache.update(json.load(data))
self._save_progress(self.cache)
self.save_state()
def _scrape_gpg_emails(self, person): def _scrape_gpg_emails(self, person):
"""Find email addresses from one person's GPG keys.""" """Find email addresses from one person's GPG keys."""
@ -193,6 +201,7 @@ class EmailPolicy(BasePolicy, Rest):
except TypeError: except TypeError:
# This exception happens when source_name, version never seen before # This exception happens when source_name, version never seen before
emails = [] emails = []
last_sent = 0
next_due = max_age next_due = max_age
if self.dry_run: if self.dry_run:
self.log("[email dry run] Age %d >= threshold %d: would email: %s" % self.log("[email dry run] Age %d >= threshold %d: would email: %s" %
@ -212,18 +221,28 @@ class EmailPolicy(BasePolicy, Rest):
server.sendmail('noreply@canonical.com', emails, msg) server.sendmail('noreply@canonical.com', emails, msg)
server.quit() server.quit()
# record the age at which the mail should have been sent # record the age at which the mail should have been sent
self.emails_by_pkg[source_name][version] = (emails, next_due) last_sent = next_due
self.save_state()
except socket.error as err: except socket.error as err:
self.log("Failed to send mail! Is SMTP server running?") self.log("Failed to send mail! Is SMTP server running?")
self.log(err) self.log(err)
self.emails_by_pkg[source_name][version] = (emails, last_sent)
self._save_progress(self.emails_by_pkg)
return PolicyVerdict.PASS return PolicyVerdict.PASS
def save_state(self, britney=None): def _save_progress(self, my_data):
"""Write source ppa data to disk""" """Checkpoint after each sent mail"""
tmp = self.filename + '.tmp' tmp = self.filename + '.new'
with open(tmp, 'w', encoding='utf-8') as data: with open(tmp, 'w', encoding='utf-8') as data:
json.dump(self.emails_by_pkg, data) json.dump(my_data, data)
os.rename(tmp, self.filename) return tmp
def save_state(self, britney=None):
"""Save email notification status of all pending packages"""
if not self.dry_run:
try:
os.rename(self.filename + '.new', self.filename)
# if we haven't written any cache, don't clobber the old one
except FileNotFoundError:
pass
if britney: if britney:
self.log("Wrote email data to %s" % self.filename) self.log("Wrote email data to %s" % self.filename)

Loading…
Cancel
Save