From 2d76cbb233d67b04fa1a90b8f6aa9d159621f0e8 Mon Sep 17 00:00:00 2001 From: Thomas Perl Date: Wed, 29 Jan 2014 20:19:46 +0100 Subject: [PATCH] Fix lynx handing for relative URLs (fixes Debian bug 732112) --- lib/urlwatch/html2txt.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/urlwatch/html2txt.py b/lib/urlwatch/html2txt.py index 641924b..5b93b19 100644 --- a/lib/urlwatch/html2txt.py +++ b/lib/urlwatch/html2txt.py @@ -77,6 +77,11 @@ def html2text(data, method='lynx', utf8=False): if method == 'lynx': # Lynx translates relative links in the mode we use it to: # file://localhost/tmp/[RANDOM STRING]/[RELATIVE LINK] + + # Recent versions of lynx (seen in 2.8.8pre1-1) do not include the + # "localhost" in the file:// URLs; see Debian bug 732112 + stdout = re.sub(r'file:///tmp/[^/]*/', '', stdout) + # Use the following regular expression to remove the unnecessary # parts, so that [RANDOM STRING] (changing on each call) does not # expose itself as change on the website (it's a Lynx-related thing -- 2.11.4.GIT