70 lines
2.2 KiB
Python
70 lines
2.2 KiB
Python
import socket
|
|
from typing import Union
|
|
from urllib.parse import urlparse
|
|
from ipaddress import ip_address, ip_network, IPv4Address
|
|
|
|
class UrlValidator:
|
|
@staticmethod
|
|
def is_internal_address(ip: Union[IPv4Address]) -> bool:
|
|
return any([
|
|
ip.is_private,
|
|
ip.is_unspecified,
|
|
ip.is_reserved,
|
|
ip.is_loopback,
|
|
ip.is_multicast,
|
|
ip.is_link_local,
|
|
])
|
|
|
|
@classmethod
|
|
def validate(cls, url: str):
|
|
DEFAULT_PORT_WHITELIST = {80, 81, 8080, 443, 8443, 8000}
|
|
DEFAULT_SCHEME_WHITELIST = {'http', 'https'}
|
|
DEFAULT_HOST_BLACKLIST = {'192.0.0.192', '169.254.169.254', '100.100.100.200', 'metadata.packet.net', 'metadata.google.internal'}
|
|
DEFAULT_CHARACTER_WHITELIST = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:/-_.?&='
|
|
|
|
if url is None:
|
|
return False
|
|
|
|
whitelist_set = set(DEFAULT_CHARACTER_WHITELIST)
|
|
if any(c not in whitelist_set for c in url):
|
|
return False
|
|
|
|
try:
|
|
ip = ip_address(url)
|
|
except ValueError:
|
|
try:
|
|
host = urlparse(url).hostname
|
|
ip = ip_address(str(socket.gethostbyname(host)))
|
|
except:
|
|
return False
|
|
|
|
port_whitelist = DEFAULT_PORT_WHITELIST.copy()
|
|
scheme_whitelist = DEFAULT_SCHEME_WHITELIST.copy()
|
|
host_blacklist = DEFAULT_HOST_BLACKLIST.copy()
|
|
|
|
try:
|
|
port, scheme = urlparse(url).port, urlparse(url).scheme
|
|
except:
|
|
return False
|
|
|
|
if scheme_whitelist and scheme is not None and scheme not in scheme_whitelist:
|
|
return False
|
|
|
|
if host_blacklist and host is not None and host in host_blacklist:
|
|
return False
|
|
|
|
if port_whitelist and port is not None and port not in port_whitelist:
|
|
return False
|
|
|
|
if ip.version == 4:
|
|
if not ip.is_private:
|
|
# CGNAT IPs do not set `is_private` so `not is_global` added
|
|
if not ip_network(ip).is_global:
|
|
return False
|
|
else:
|
|
return False
|
|
|
|
if cls.is_internal_address(ip):
|
|
return False
|
|
|
|
return True |