diff -urNad /tmp/bp-build/trac-spamfilter-svn.orig/tracspamfilter/filters/bayes.py /tmp/bp-build/trac-spamfilter-svn/tracspamfilter/filters/bayes.py
--- trac-spamfilter-svn.orig/tracspamfilter/filters/bayes.py	2011-03-22 10:47:07.905191151 +0000
+++ trac-spamfilter-svn/tracspamfilter/filters/bayes.py	2011-03-22 10:49:12.253401794 +0000
@@ -75,7 +75,7 @@
                       spam and 'spam' or 'ham')
 
         hammie = self._get_hammie()
-        hammie.train(content.encode('utf-8'), spam)
+        hammie.train(content.encode('utf-8', 'ignore'), spam)
         hammie.store()
 
     # Internal methods
@@ -103,7 +103,14 @@
         else: # new database
             self.nspam = self.nham = 0
 
+    def _sanitize(self, text):
+        """Remove invalid byte sequences from utf-8 encoded text"""
+        if isinstance(text, unicode):
+            return text
+        return text.decode('utf-8', 'ignore')
+
     def _get_row(self, word):
+        word = self._sanitize(word)
         cursor = self.db.cursor()
         cursor.execute("SELECT nspam,nham FROM spamfilter_bayes WHERE word=%s",
                        (word,))
@@ -114,6 +121,7 @@
         return {'nspam': row[0], 'nham': row[1]}
 
     def _set_row(self, word, nspam, nham):
+        word = self._sanitize(word)
         cursor = self.db.cursor()
         if self._has_key(word):
             cursor.execute("UPDATE spamfilter_bayes SET nspam=%s,nham=%s "
@@ -124,11 +132,13 @@
         self.db.commit()
 
     def _delete_row(self, word):
+        word = self._sanitize(word)
         cursor = self.db.cursor()
         cursor.execute("DELETE FROM spamfilter_bayes WHERE word=%s", (word,))
         self.db.commit()
 
     def _has_key(self, key):
+        key = self._sanitize(key)
         cursor = self.db.cursor()
         cursor.execute("SELECT COUNT(*) FROM spamfilter_bayes WHERE word=%s",
                        (key,))
@@ -136,9 +146,6 @@
 
     def _wordinfoget(self, word):
         # See http://mail.python.org/pipermail/spambayes-dev/2006-July/003684.html
-        if isinstance(word, unicode):
-            word = word.encode("utf-8")
-
         row = self._get_row(word)
         if row:
             item = self.WordInfoClass()
