from multipledispatch import dispatch
from typing import List, Tuple, Iterable, Union, Optional
from datetime import datetime, timedelta
from abc import ABCMeta, abstractmethod
import logging
import re
OPTION_VAR_INDEX: str = "index"
"""The option setting character of index."""
OPTION_VAR_DATE: str = "date"
"""The option setting character of date."""
OPTION_VAR_DATETIME: str = "datetime"
"""The option setting character of datetime."""
OPTION_VAR_ITERATOR: str = "iterator"
"""The option setting character of iterator."""
[docs]def get_option() -> Tuple:
"""
Get all types of option which could be write in URL character.
:return: A tuple of all option types.
"""
return OPTION_VAR_INDEX, OPTION_VAR_DATE, OPTION_VAR_DATETIME, OPTION_VAR_ITERATOR
[docs]def set_index_rule() -> str:
"""
Get the index option.
The index option would iterate to generate URLs with index (1, 2, 3, ...).
:return: The setting string of index option.
"""
return OPTION_VAR_INDEX
[docs]def set_date_rule() -> str:
"""
Get the date option. In generally, it's yyyymmdd. It only has year, month and day.
It could iterator to generate URLs with date (for example, 20210101, 20210102, ...).
:return: The setting string of date option.
"""
return OPTION_VAR_DATE
[docs]def set_datetime_rule() -> str:
"""
Get the datetime option. In generally, it's yyyymmddhhMMss. It has year, month, day,
hour, minute and second.
It could iterator to generate URLs with datetime (for example, 20210101000000, 20210101000001, ...).
:return: The setting string of datetime option.
"""
return OPTION_VAR_DATETIME
[docs]def set_iterator_rule() -> str:
"""
Get the iterator option.
It could iterator to generate URLs with target iterator object.
:return: The setting string of iterator option.
"""
return OPTION_VAR_ITERATOR
class BaseURL(metaclass=ABCMeta):
@property
@abstractmethod
def base_url(self) -> str:
"""
An URL value. It could contain one of specific options (OPTION_VAR_INDEX, OPTION_VAR_DATE,
OPTION_VAR_DATETIME and OPTION_VAR_ITERATOR) and it would be generated with the option meaning
value. For example, it could set *base_url* as 'https://www.google.com?date={date}'. The
URL be generated would be like 'https://www.google.com?date=20220601'.
:return: An URL string value.
"""
pass
@abstractmethod
def generate(self) -> List[str]:
"""
Generating all the URLs we need base on the options.
:return: A collection of URLs.
"""
pass
[docs]class URL(BaseURL):
__Base_Url: str = None
__URLs: List[str] = []
def __init__(self, base: str, start: Optional[Union[int, str]] = None, end: Optional[Union[int, str]] = None, formatter: str = "yyyymmdd", iter: Optional[Iterable] = None):
if base == "" or base is None:
raise ValueError("Foundational URL cannot be empty.")
self.__Base_Url: str = base
# Check the character of variable.
self.option_is_index = re.search(r"\{" + re.escape(OPTION_VAR_INDEX) + "\}", base)
self.option_is_date = re.search(r"\{" + re.escape(OPTION_VAR_DATE) + "\}", base)
self.option_is_datetime = re.search(r"\{" + re.escape(OPTION_VAR_DATETIME) + "\}", base)
self.option_is_iterator = re.search(r"\{" + re.escape(OPTION_VAR_ITERATOR) + "\}", base)
self.start: Optional[Union[int, str]] = start
self.end: Optional[Union[int, str]] = end
self.formatter: str = formatter
self.iterator: Iterable = iter
self._start_date = None
self._end_date = None
self._diff_days = None
self._period_days = 1
self._period_hours = 0
self._period_minutes = 0
self._period_seconds = 0
self._option: str = None
self.urls: List = []
@property
def base_url(self) -> str:
return self.__Base_Url
@base_url.setter
def base_url(self, url: str) -> None:
self.__Base_Url = url
[docs] def is_index_rule(self) -> bool:
"""
Check the option setting of current URL object is index type.
:return: It returns True if it is, or it returns False.
"""
return self.option_is_index is not None
[docs] def is_date_rule(self) -> bool:
"""
Check the option setting of current URL object is date type.
:return: It returns True if it is, or it returns False.
"""
return self.option_is_date is not None
[docs] def is_datetime_rule(self) -> bool:
"""
Check the option setting of current URL object is datetime type.
:return: It returns True if it is, or it returns False.
"""
return self.option_is_datetime is not None
[docs] def is_iterator_rule(self) -> bool:
"""
Check the option setting of current URL object is iterator type.
:return: It returns True if it is, or it returns False.
"""
return self.option_is_iterator is not None
[docs] def is_valid(self) -> bool:
"""
Check the option setting of current URL object is valid.
:return: It returns True if it is, or it returns False.
"""
return self.is_index_rule() or \
self.is_date_rule() or \
self.is_datetime_rule() or \
self.is_iterator_rule()
@property
def period_days(self) -> int:
"""
Get the day value of period.
:return: Return day value and it's a int type data.
"""
return self._period_days
@property
def period_hours(self) -> int:
"""
Get the hour value of period.
:return: Return hour value and it's a int type data.
"""
return self._period_hours
@property
def period_minutes(self) -> int:
"""
Get the minute value of period.
:return: Return minute value and it's a int type data.
"""
return self._period_minutes
@property
def period_seconds(self) -> int:
"""
Get the second value of period.
:return: Return second value and it's a int type data.
"""
return self._period_seconds
[docs] def set_period(self, days: Optional[int] = None, hours: Optional[int] = None, minutes: Optional[int] = None, seconds: Optional[int] = None) -> None:
"""
Configure the period settings like how many days, hours, minutes or seconds.
:param days: How many days to iterate next value.
:param hours: How many hours to iterate next value.
:param minutes: How many minutes to iterate next value.
:param seconds: How many seconds to iterate next value.
:return: None
"""
if days is not None:
self._period_days = days
if hours is not None:
self._period_hours = hours
if minutes is not None:
self._period_minutes = minutes
if seconds is not None:
self._period_seconds = seconds
[docs] def generate(self) -> List[str]:
if self.option_is_index:
# Check whether the needed option id ready or not.
if self.start is None and self.end is None:
raise ValueError("Options *start* and *end* cannot be empty value with INDEX rule.")
if type(self.start) is not int or type(self.end) is not int:
logging.warning("The types of start index and end index aren't 'int'. It will try to convert to 'int' type.")
try:
self.start = int(self.start)
self.end = int(self.end)
except ValueError as e:
raise ValueError("Parameter *start* and *end* should be integers or integer type characters.")
self._index_handling(index=self.start)
elif self.option_is_date:
if self.start is None and self.end is None:
raise ValueError("Options *start* and *end* cannot be empty value with DATE rule.")
if type(self.start) is not str or type(self.end) is not str:
raise ValueError("The value format is incorrect of options *start* and *end*.")
chksum = URL._is_py_datetime_format(formatter=self.formatter)
if chksum is True:
formatter = self.formatter
else:
formatter = URL._convert_formatter(formatter=self.formatter)
self._start_date = datetime.strptime(self.start, formatter)
self._end_date = datetime.strptime(self.end, formatter)
self._diff_days = (self._end_date.date() - self._start_date.date()).days
self._date_handling(_date=self._start_date, days=self.period_days)
elif self.option_is_datetime:
if self.start is None and self.end is None:
raise ValueError("Options *start* and *end* cannot be empty value with DATETIME rule.")
if type(self.start) is not str or type(self.end) is not str:
raise ValueError("The value format is incorrect of options *start* and *end*.")
chksum = URL._is_py_datetime_format(formatter=self.formatter)
if chksum is True:
formatter = self.formatter
else:
formatter = URL._convert_formatter(formatter=self.formatter)
self._start_date = datetime.strptime(self.start, formatter)
self._end_date = datetime.strptime(self.end, formatter)
self._diff_days = (self._end_date - self._start_date).days
self._datetime_handling(
_datetime=self._start_date,
days=self.period_days,
hours=self.period_hours,
minutes=self.period_minutes,
seconds=self.period_seconds)
elif self.option_is_iterator:
if self.iterator is None:
raise ValueError("Options *iterator* cannot be empty value with ITERATOR rule.")
self._iterator_handling(self.iterator)
else:
raise ValueError("Cannot verify the option variable. Please using '{%s}', '{%s}', '{%s}' or '{%s}'.",
OPTION_VAR_INDEX, OPTION_VAR_DATE, OPTION_VAR_DATETIME, OPTION_VAR_ITERATOR)
return self.urls
[docs] def _index_handling(self, index: int) -> None:
"""
The main process to generate URL with index.
:param index: The index value. It should be a started number.
:return: None
"""
if index <= self.end:
option = URL._add_flag(option=OPTION_VAR_INDEX)
target_url = self.base_url.replace(option, str(index))
self.urls.append(target_url)
self._index_handling(index=index + 1)
[docs] def _date_handling(self, _date: datetime, days: int) -> None:
"""
The main process to generate URL with date.
:param _date: A datetime object.
:param days: How many days to iterate next value.
:return: None
"""
if _date <= self._end_date:
date_option_val = _date.strftime("%Y%m%d").replace("-", "")
option = URL._add_flag(option=OPTION_VAR_DATE)
target_url = self.base_url.replace(option, date_option_val)
self.urls.append(target_url)
new_date = _date + timedelta(days=days)
self._date_handling(_date=new_date, days=days)
[docs] def _datetime_handling(self, _datetime: datetime, days: int = None, hours: int=None, minutes: int=None, seconds: int=None) -> None:
"""
The main process to generate URL with datetime.
:param _datetime:
:param days: How many days to iterate next value.
:param hours: How many hours to iterate next value.
:param minutes: How many minutes to iterate next value.
:param seconds: How many seconds to iterate next value.
:return: None
"""
if _datetime <= self._end_date:
date_option_val = _datetime.isoformat().replace("-", "")
date_option_val = date_option_val.replace(":", "")
date_option_val = date_option_val.replace("T", "")
option = URL._add_flag(option=OPTION_VAR_DATETIME)
target_url = self.base_url.replace(option, date_option_val)
self.urls.append(target_url)
new_datetime = _datetime + timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
self._datetime_handling(_datetime=new_datetime, days=days, hours=hours, minutes=minutes, seconds=seconds)
@dispatch((list, tuple, set))
def _iterator_handling(self, iter: Union[list, tuple, set]) -> None:
"""
The main process to generate URL with iterator. This overload function run process
if the option *iter* is *list*, *tuple* or *set* type.
:param iter: A iterator. It could be a *list*, *tuple* or *set* type.
:return: None
"""
for ele in iter:
option = URL._add_flag(option=OPTION_VAR_ITERATOR)
target_url = self.base_url.replace(option, str(ele))
self.urls.append(target_url)
@dispatch(dict)
def _iterator_handling(self, iter: dict) -> None:
"""
Description:
The main process to generate URL with iterator. This overload function run process
if the option *iter* is *dict* type.
It could all data content which be saved in received parameter
combine the key and value as HTTP GET method format options.
Example:
Received parameter value:
{"index_1": 1, "index_2": 2}
Result:
index_1=1, index_2=2
:param iter: A iterator. It could be a *dict type.
:return: None
"""
for key, val in iter.items():
option = URL._add_flag(option=OPTION_VAR_ITERATOR)
target_url = self.base_url.replace(option, f"{key}={val}")
self.urls.append(target_url)
[docs] @staticmethod
def _add_flag(option: str) -> str:
"""
Get the character with the option and the specific format it defines.
:param option: The option setting.
:return: A string value.
"""
return "{" + option + "}"