Got from https://sourceforge.net/tracker/index.php?func=detail&aid=891491&group_id=103&atid=300103
[ 891491 ] Scrubber.py patch 	(file 77903: scrubber.patch	2004-02-25)

You can apply it to an installed Mailman-System,
but you should apply the Defaults.py.in patch to Defaults.py in that case.


Index: mailman/Mailman/Defaults.py.in
===================================================================
RCS file: /cvsroot/mailman/mailman/Mailman/Defaults.py.in,v
retrieving revision 2.112.2.14
diff -u -r2.112.2.14 Defaults.py.in
--- mailman/Mailman/Defaults.py.in	22 Feb 2004 22:20:51 -0000	2.112.2.14
+++ mailman/Mailman/Defaults.py.in	25 Feb 2004 08:14:37 -0000
@@ -256,6 +256,11 @@
 # should modify the Message object as necessary.
 ARCHIVE_SCRUBBER = 'Mailman.Handlers.Scrubber'
 
+# Mailman.Handlers.Scrubber uses attachment's filename as is.
+# If you don't like this (extremely long mime-encoded filename) then set 
+# this True.
+SCRUBBER_DONT_USE_ATTACHMENT_FILENAME = False
+
 # This variable defines what happens to text/html subparts.  They can be
 # stripped completely, escaped, or filtered through an external program.  The
 # legal values are:
Index: mailman/Mailman/Handlers/Scrubber.py
===================================================================
RCS file: /cvsroot/mailman/mailman/Mailman/Handlers/Scrubber.py,v
retrieving revision 2.18.2.6
diff -u -r2.18.2.6 Scrubber.py
--- mailman/Mailman/Handlers/Scrubber.py	1 Dec 2003 01:43:18 -0000	2.18.2.6
+++ mailman/Mailman/Handlers/Scrubber.py	25 Feb 2004 08:14:37 -0000
@@ -27,7 +27,7 @@
 import binascii
 import tempfile
 from cStringIO import StringIO
-from types import IntType
+from types import IntType, StringType
 
 from email.Utils import parsedate
 from email.Parser import HeaderParser
@@ -180,11 +180,29 @@
             # message.
             if charset is None:
                 charset = part.get_content_charset(lcset)
+            # TK: if part is attached then check charset and scrub if none
+            if part.get('content-disposition') and \
+               not part.get_content_charset():
+                omask = os.umask(002)
+                try:
+                    url = save_attachment(mlist, part, dir)
+                finally:
+                    os.umask(omask)
+                filename = part.get_filename(_('not available'))
+                filename = Utils.oneline(filename, lcset)
+                del part['content-type']
+                del part['content-transfer-encoding']
+                part.set_payload(_("""\
+An embedded and charset-unspecified text was scrubbed...
+Name: %(filename)s
+Url: %(url)s
+"""), lcset)
         elif ctype == 'text/html' and isinstance(sanitize, IntType):
             if sanitize == 0:
                 if outer:
                     raise DiscardMessage
                 del part['content-type']
+                del part['content-transfer-encoding']
                 part.set_payload(_('HTML attachment scrubbed and removed'),
                                  # Adding charset arg and removing content-tpe
                                  # sets content-type to text/plain
@@ -202,6 +220,7 @@
                 finally:
                     os.umask(omask)
                 del part['content-type']
+                del part['content-transfer-encoding']
                 part.set_payload(_("""\
 An HTML attachment was scrubbed...
 URL: %(url)s
@@ -267,6 +286,7 @@
                 os.umask(omask)
             desc = part.get('content-description', _('not available'))
             filename = part.get_filename(_('not available'))
+            filename = Utils.oneline(filename, lcset)
             del part['content-type']
             del part['content-transfer-encoding']
             part.set_payload(_("""\
@@ -285,7 +305,7 @@
         # By default we take the charset of the first text/plain part in the
         # message, but if there was none, we'll use the list's preferred
         # language's charset.
-        if charset is None or charset == 'us-ascii':
+        if not charset or charset == 'us-ascii':
             charset = lcset
         # We now want to concatenate all the parts which have been scrubbed to
         # text/plain, into a single text/plain payload.  We need to make sure
@@ -294,17 +314,27 @@
         # BAW: Martin's original patch suggested we might want to try
         # generalizing to utf-8, and that's probably a good idea (eventually).
         text = []
-        for part in msg.get_payload():
+        for part in msg.walk():
+            if part.get_content_maintype() == 'multipart':
+                continue
             # All parts should be scrubbed to text/plain by now.
             partctype = part.get_content_type()
             if partctype <> 'text/plain':
-                text.append(_('Skipped content of type %(partctype)s'))
+                text.append(_('Skipped content of type %(partctype)s\n'))
                 continue
             try:
                 t = part.get_payload(decode=True)
             except binascii.Error:
                 t = part.get_payload()
-            partcharset = part.get_content_charset()
+            # TK: get_content_charset() returns 'iso-2022-jp' for internally
+            # crafted (scrubbed) 'euc-jp' text part. So, first try 
+            # get_charset(), then get_content_charset() for the parts
+            # which are already embeded in the incoming message.
+            partcharset = part.get_charset()
+            if partcharset:
+                partcharset = str(partcharset)
+            else:
+                partcharset = part.get_content_charset()
             if partcharset and partcharset <> charset:
                 try:
                     t = unicode(t, partcharset, 'replace')
@@ -320,9 +350,10 @@
                 except (UnicodeError, LookupError, ValueError):
                     t = t.encode(lcset, 'replace')
             # Separation is useful
-            if not t.endswith('\n'):
-                t += '\n'
-            text.append(t)
+            if isinstance(t, StringType):
+                if not t.endswith('\n'):
+                    t += '\n'
+                text.append(t)
         # Now join the text and set the payload
         sep = _('-------------- next part --------------\n')
         del msg['content-type']
@@ -376,7 +407,7 @@
         # Now base the filename on what's in the attachment, uniquifying it if
         # necessary.
         filename = msg.get_filename()
-        if not filename:
+        if not filename or mm_cfg.SCRUBBER_DONT_USE_ATTACHMENT_FILENAME:
             filebase = 'attachment'
         else:
             # Sanitize the filename given in the message headers
