From 2fcba49433886ef2e1237f1bd256aadcea494309 Mon Sep 17 00:00:00 2001 From: enwuenwu <108224417+enwuenwu@users.noreply.github.com> Date: Sun, 28 Jul 2024 20:11:48 +0000 Subject: [PATCH] [Mailman2Bridge] fix message separation and improve "From_ lines" disambiguation (#4156) * [Mailman2Bridge.php] enable PCRE_MULTILINE pattern modifier Enable PCRE_MULTILINE pattern modifier on mbox content parsing. Without it parsing monthly archives results in only a single message each. * [Mailman2Bridge.php] extend mbox "From_ lines" pattern Extend PCRE pattern matching individual "From_ lines" used to split single messages in mbox content. In addition to the matching line having to start with 'From ' it now also has to end with time and date (hh:mm:ss yyyy). This makes the pattern slightly more robust against accidental matches when a line within the actual message body starts with 'From ' which Mailman 2 (Pipermail) may not be configured to disambiguate. * [Mailman2Bridge.php] remove trailing slash from URI constant --------- Co-authored-by: enwu <108224417+8279279374@users.noreply.github.com> --- bridges/Mailman2Bridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/Mailman2Bridge.php b/bridges/Mailman2Bridge.php index ad0d8110..6b620c03 100644 --- a/bridges/Mailman2Bridge.php +++ b/bridges/Mailman2Bridge.php @@ -3,7 +3,7 @@ class Mailman2Bridge extends BridgeAbstract { const NAME = 'Mailman2Bridge'; - const URI = 'https://list.org/'; + const URI = 'https://list.org'; const MAINTAINER = 'imagoiq'; const CACHE_TIMEOUT = 60 * 30; // 30m const DESCRIPTION = 'Fetch latest messages from Mailman 2 archive (Pipermail)'; @@ -68,7 +68,7 @@ class Mailman2Bridge extends BridgeAbstract throw new \Exception('Failed to gzdecode'); } } - $mboxParts = preg_split('/^From /', $data); + $mboxParts = preg_split('/^From\s.+\d{2}:\d{2}:\d{2}\s\d{4}$/m', $data); // Drop the first element which is always an empty string array_shift($mboxParts); $mboxMails = array_reverse($mboxParts);