From 71143ee2389036cd9365053e637e235ccdd9e9ae Mon Sep 17 00:00:00 2001 From: 9cfa <9cfa@172.16.100.2> Date: Mon, 13 Mar 2023 22:36:40 +0100 Subject: [PATCH] domain validation --- dependencies/validate_url.py | 70 ++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 dependencies/validate_url.py diff --git a/dependencies/validate_url.py b/dependencies/validate_url.py new file mode 100644 index 0000000..3e46684 --- /dev/null +++ b/dependencies/validate_url.py @@ -0,0 +1,70 @@ +import socket +from typing import Union +from urllib.parse import urlparse +from ipaddress import ip_address, ip_network, IPv4Address + +class UrlValidator: + @staticmethod + def is_internal_address(ip: Union[IPv4Address]) -> bool: + return any([ + ip.is_private, + ip.is_unspecified, + ip.is_reserved, + ip.is_loopback, + ip.is_multicast, + ip.is_link_local, + ]) + + @classmethod + def validate(cls, url: str): + DEFAULT_PORT_WHITELIST = {80, 81, 8080, 443, 8443, 8000} + DEFAULT_SCHEME_WHITELIST = {'http', 'https'} + DEFAULT_HOST_BLACKLIST = {'192.0.0.192', '169.254.169.254', '100.100.100.200', 'metadata.packet.net', 'metadata.google.internal'} + DEFAULT_CHARACTER_WHITELIST = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:/-_.?&=' + + if url is None: + return False + + whitelist_set = set(DEFAULT_CHARACTER_WHITELIST) + if any(c not in whitelist_set for c in url): + return False + + try: + ip = ip_address(url) + except ValueError: + try: + host = urlparse(url).hostname + ip = ip_address(str(socket.gethostbyname(host))) + except: + return False + + port_whitelist = DEFAULT_PORT_WHITELIST.copy() + scheme_whitelist = DEFAULT_SCHEME_WHITELIST.copy() + host_blacklist = DEFAULT_HOST_BLACKLIST.copy() + + try: + port, scheme = urlparse(url).port, urlparse(url).scheme + except: + return False + + if scheme_whitelist and scheme is not None and scheme not in scheme_whitelist: + return False + + if host_blacklist and host is not None and host in host_blacklist: + return False + + if port_whitelist and port is not None and port not in port_whitelist: + return False + + if ip.version == 4: + if not ip.is_private: + # CGNAT IPs do not set `is_private` so `not is_global` added + if not ip_network(ip).is_global: + return False + else: + return False + + if cls.is_internal_address(ip): + return False + + return True \ No newline at end of file