1 files changed, 377 insertions, 0 deletions
diff --git a/libs/apprise/attachment/AttachHTTP.py b/libs/apprise/attachment/AttachHTTP.py
new file mode 100644
index 000000000..5a3af9467
--- /dev/null
+++ b/libs/apprise/attachment/AttachHTTP.py
@@ -0,0 +1,377 @@
+# -*- coding: utf-8 -*-
+# BSD 2-Clause License
+#
+# Apprise - Push Notification Library.
+# Copyright (c) 2024, Chris Caron <[email protected]>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import re
+import os
+import requests
+import threading
+from tempfile import NamedTemporaryFile
+from .AttachBase import AttachBase
+from ..common import ContentLocation
+from ..URLBase import PrivacyMode
+from ..AppriseLocale import gettext_lazy as _
+
+
+class AttachHTTP(AttachBase):
+    """
+    A wrapper for HTTP based attachment sources
+    """
+
+    # The default descriptive name associated with the service
+    service_name = _('Web Based')
+
+    # The default protocol
+    protocol = 'http'
+
+    # The default secure protocol
+    secure_protocol = 'https'
+
+    # The number of bytes in memory to read from the remote source at a time
+    chunk_size = 8192
+
+    # Web based requests are remote/external to our current location
+    location = ContentLocation.HOSTED
+
+    # thread safe loading
+    _lock = threading.Lock()
+
+    def __init__(self, headers=None, **kwargs):
+        """
+        Initialize HTTP Object
+
+        headers can be a dictionary of key/value pairs that you want to
+        additionally include as part of the server headers to post with
+
+        """
+        super().__init__(**kwargs)
+
+        self.schema = 'https' if self.secure else 'http'
+
+        self.fullpath = kwargs.get('fullpath')
+        if not isinstance(self.fullpath, str):
+            self.fullpath = '/'
+
+        self.headers = {}
+        if headers:
+            # Store our extra headers
+            self.headers.update(headers)
+
+        # Where our content is written to upon a call to download.
+        self._temp_file = None
+
+        # Our Query String Dictionary; we use this to track arguments
+        # specified that aren't otherwise part of this class
+        self.qsd = {k: v for k, v in kwargs.get('qsd', {}).items()
+                    if k not in self.template_args}
+
+        return
+
+    def download(self, **kwargs):
+        """
+        Perform retrieval of the configuration based on the specified request
+        """
+
+        if self.location == ContentLocation.INACCESSIBLE:
+            # our content is inaccessible
+            return False
+
+        # prepare header
+        headers = {
+            'User-Agent': self.app_id,
+        }
+
+        # Apply any/all header over-rides defined
+        headers.update(self.headers)
+
+        auth = None
+        if self.user:
+            auth = (self.user, self.password)
+
+        url = '%s://%s' % (self.schema, self.host)
+        if isinstance(self.port, int):
+            url += ':%d' % self.port
+
+        url += self.fullpath
+
+        # Where our request object will temporarily live.
+        r = None
+
+        # Always call throttle before any remote server i/o is made
+        self.throttle()
+
+        with self._lock:
+            if self.exists(retrieve_if_missing=False):
+                # Due to locking; it's possible a concurrent thread already
+                # handled the retrieval in which case we can safely move on
+                self.logger.trace(
+                    'HTTP Attachment %s already retrieved',
+                    self._temp_file.name)
+                return True
+
+            # Ensure any existing content set has been invalidated
+            self.invalidate()
+
+            self.logger.debug(
+                'HTTP Attachment Fetch URL: %s (cert_verify=%r)' % (
+                    url, self.verify_certificate))
+
+            try:
+                # Make our request
+                with requests.get(
+                        url,
+                        headers=headers,
+                        auth=auth,
+                        params=self.qsd,
+                        verify=self.verify_certificate,
+                        timeout=self.request_timeout,
+                        stream=True) as r:
+
+                    # Handle Errors
+                    r.raise_for_status()
+
+                    # Get our file-size (if known)
+                    try:
+                        file_size = int(r.headers.get('Content-Length', '0'))
+                    except (TypeError, ValueError):
+                        # Handle edge case where Content-Length is a bad value
+                        file_size = 0
+
+                    # Perform a little Q/A on file limitations and restrictions
+                    if self.max_file_size > 0 and \
+                            file_size > self.max_file_size:
+
+                        # The content retrieved is to large
+                        self.logger.error(
+                            'HTTP response exceeds allowable maximum file '
+                            'length ({}KB): {}'.format(
+                                int(self.max_file_size / 1024),
+                                self.url(privacy=True)))
+
+                        # Return False (signifying a failure)
+                        return False
+
+                    # Detect config format based on mime if the format isn't
+                    # already enforced
+                    self.detected_mimetype = r.headers.get('Content-Type')
+
+                    d = r.headers.get('Content-Disposition', '')
+                    result = re.search(
+                        "filename=['\"]?(?P<name>[^'\"]+)['\"]?", d, re.I)
+                    if result:
+                        self.detected_name = result.group('name').strip()
+
+                    # Create a temporary file to work with; delete must be set
+                    # to False or it isn't compatible with Microsoft Windows
+                    # instances. In lieu of this, __del__ will clean up the
+                    # file for us.
+                    self._temp_file = NamedTemporaryFile(delete=False)
+
+                    # Get our chunk size
+                    chunk_size = self.chunk_size
+
+                    # Track all bytes written to disk
+                    bytes_written = 0
+
+                    # If we get here, we can now safely write our content to
+                    # disk
+                    for chunk in r.iter_content(chunk_size=chunk_size):
+                        # filter out keep-alive chunks
+                        if chunk:
+                            self._temp_file.write(chunk)
+                            bytes_written = self._temp_file.tell()
+
+                            # Prevent a case where Content-Length isn't
+                            # provided. In this case we don't want to fetch
+                            # beyond our limits
+                            if self.max_file_size > 0:
+                                if bytes_written > self.max_file_size:
+                                    # The content retrieved is to large
+                                    self.logger.error(
+                                        'HTTP response exceeds allowable '
+                                        'maximum file length '
+                                        '({}KB): {}'.format(
+                                            int(self.max_file_size / 1024),
+                                            self.url(privacy=True)))
+
+                                    # Invalidate any variables previously set
+                                    self.invalidate()
+
+                                    # Return False (signifying a failure)
+                                    return False
+
+                                elif bytes_written + chunk_size \
+                                        > self.max_file_size:
+                                    # Adjust out next read to accomodate up to
+                                    # our limit +1. This will prevent us from
+                                    # reading to much into our memory buffer
+                                    self.max_file_size - bytes_written + 1
+
+                    # Ensure our content is flushed to disk for post-processing
+                    self._temp_file.flush()
+
+                    # Set our minimum requirements for a successful download()
+                    # call
+                    self.download_path = self._temp_file.name
+                    if not self.detected_name:
+                        self.detected_name = os.path.basename(self.fullpath)
+
+            except requests.RequestException as e:
+                self.logger.error(
+                    'A Connection error occurred retrieving HTTP '
+                    'configuration from %s.' % self.host)
+                self.logger.debug('Socket Exception: %s' % str(e))
+
+                # Invalidate any variables previously set
+                self.invalidate()
+
+                # Return False (signifying a failure)
+                return False
+
+            except (IOError, OSError):
+                # IOError is present for backwards compatibility with Python
+                # versions older then 3.3.  >= 3.3 throw OSError now.
+
+                # Could not open and/or write the temporary file
+                self.logger.error(
+                    'Could not write attachment to disk: {}'.format(
+                        self.url(privacy=True)))
+
+                # Invalidate any variables previously set
+                self.invalidate()
+
+                # Return False (signifying a failure)
+                return False
+
+        # Return our success
+        return True
+
+    def invalidate(self):
+        """
+        Close our temporary file
+        """
+        if self._temp_file:
+            self.logger.trace(
+                'Attachment cleanup of %s', self._temp_file.name)
+            self._temp_file.close()
+
+            try:
+                # Ensure our file is removed (if it exists)
+                os.unlink(self._temp_file.name)
+
+            except OSError:
+                pass
+
+            # Reset our temporary file to prevent from entering
+            # this block again
+            self._temp_file = None
+
+        super().invalidate()
+
+    def __del__(self):
+        """
+        Tidy memory if open
+        """
+        with self._lock:
+            self.invalidate()
+
+    def url(self, privacy=False, *args, **kwargs):
+        """
+        Returns the URL built dynamically based on specified arguments.
+        """
+
+        # Our URL parameters
+        params = self.url_parameters(privacy=privacy, *args, **kwargs)
+
+        # Prepare our cache value
+        if self.cache is not None:
+            if isinstance(self.cache, bool) or not self.cache:
+                cache = 'yes' if self.cache else 'no'
+            else:
+                cache = int(self.cache)
+
+            # Set our cache value
+            params['cache'] = cache
+
+        if self._mimetype:
+            # A format was enforced
+            params['mime'] = self._mimetype
+
+        if self._name:
+            # A name was enforced
+            params['name'] = self._name
+
+        # Append our headers into our parameters
+        params.update({'+{}'.format(k): v for k, v in self.headers.items()})
+
+        # Apply any remaining entries to our URL
+        params.update(self.qsd)
+
+        # Determine Authentication
+        auth = ''
+        if self.user and self.password:
+            auth = '{user}:{password}@'.format(
+                user=self.quote(self.user, safe=''),
+                password=self.pprint(
+                    self.password, privacy, mode=PrivacyMode.Secret, safe=''),
+            )
+        elif self.user:
+            auth = '{user}@'.format(
+                user=self.quote(self.user, safe=''),
+            )
+
+        default_port = 443 if self.secure else 80
+
+        return '{schema}://{auth}{hostname}{port}{fullpath}?{params}'.format(
+            schema=self.secure_protocol if self.secure else self.protocol,
+            auth=auth,
+            hostname=self.quote(self.host, safe=''),
+            port='' if self.port is None or self.port == default_port
+                 else ':{}'.format(self.port),
+            fullpath=self.quote(self.fullpath, safe='/'),
+            params=self.urlencode(params),
+        )
+
+    @staticmethod
+    def parse_url(url):
+        """
+        Parses the URL and returns enough arguments that can allow
+        us to re-instantiate this object.
+
+        """
+        results = AttachBase.parse_url(url)
+
+        if not results:
+            # We're done early as we couldn't load the results
+            return results
+
+        # Add our headers that the user can potentially over-ride if they wish
+        # to to our returned result set
+        results['headers'] = results['qsd-']
+        results['headers'].update(results['qsd+'])
+
+        return results