import socket from typing import Union from urllib.parse import urlparse from ipaddress import ip_address, ip_network, IPv4Address class UrlValidator: @staticmethod def is_internal_address(ip: Union[IPv4Address]) -> bool: return any([ ip.is_private, ip.is_unspecified, ip.is_reserved, ip.is_loopback, ip.is_multicast, ip.is_link_local, ]) @classmethod def validate(cls, url: str): DEFAULT_PORT_WHITELIST = {80, 81, 8080, 443, 8443, 8000} DEFAULT_SCHEME_WHITELIST = {'http', 'https'} DEFAULT_HOST_BLACKLIST = {'192.0.0.192', '169.254.169.254', '100.100.100.200', 'metadata.packet.net', 'metadata.google.internal'} DEFAULT_CHARACTER_WHITELIST = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:/-_.?&=' if url is None: return False whitelist_set = set(DEFAULT_CHARACTER_WHITELIST) if any(c not in whitelist_set for c in url): return False try: ip = ip_address(url) except ValueError: try: host = urlparse(url).hostname ip = ip_address(str(socket.gethostbyname(host))) except: return False port_whitelist = DEFAULT_PORT_WHITELIST.copy() scheme_whitelist = DEFAULT_SCHEME_WHITELIST.copy() host_blacklist = DEFAULT_HOST_BLACKLIST.copy() try: port, scheme = urlparse(url).port, urlparse(url).scheme except: return False if scheme_whitelist and scheme is not None and scheme not in scheme_whitelist: return False if host_blacklist and host is not None and host in host_blacklist: return False if port_whitelist and port is not None and port not in port_whitelist: return False if ip.version == 4: if not ip.is_private: # CGNAT IPs do not set `is_private` so `not is_global` added if not ip_network(ip).is_global: return False else: return False if cls.is_internal_address(ip): return False return True