Skip to content

Utilities - Request

Utilities - Request¤

get_random_user_agent(browsers=None) ¤

get_random_user_agent returns a random user agent.

We provide two predefined browers, chrome and firefox.

Parameters:

Name Type Description Default
browsers list, optional

which brower to be used, defaults to ["chrome", "firefox"]

None

Returns:

Type Description
dict

dictionary for requests module to consude as {'User-Agent': "blabla"}

Source code in sm_trendy/utilities/request.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def get_random_user_agent(browsers=None):
    """
    get_random_user_agent returns a random user agent.

    We provide two predefined browers, chrome and firefox.

    :param browsers: which brower to be used, defaults to ["chrome", "firefox"]
    :type browsers: list, optional
    :return: dictionary for requests module to consude as {'User-Agent': "blabla"}
    :rtype: dict
    """

    if browsers is None:
        browsers = ["chrome", "firefox"]
    if isinstance(browsers, str):
        browsers = [browsers]

    chrome_user_agents = [
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36",
        "Mozilla/5.0 (Windows NT 5.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
    ]
    firefox_user_agents = [
        "Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1)",
        "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
        "Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko",
        "Mozilla/5.0 (Windows NT 6.2; WOW64; Trident/7.0; rv:11.0) like Gecko",
        "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)",
        "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko",
        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko",
        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)",
    ]

    user_agents_dict = {"chrome": chrome_user_agents, "firefox": firefox_user_agents}

    # error if specified browser is not in the list
    if set(browsers) - set(user_agents_dict.keys()):
        logger.error(f"Unknown browser: {set(browsers) - set(user_agents_dict.keys())}")

    user_agent_list = sum([user_agents_dict[browser] for browser in browsers], [])

    return {"User-Agent": random.choice(user_agent_list)}

get_request_configs(headers=None, timeout=None, proxies=None, cookies=None) ¤

get_session_query_configs creates a session config dictionary for session to use. These are the keyword arguments of the session get or post methods.

Proxies can be set by providing a dictionary of the form

{
    'http': some super_proxy_url,
    'https': some super_proxy_url,
}

Parameters:

Name Type Description Default
headers

header of the method such as use agent, defaults to random user agent from get_random_user_agent

None
timeout

timeout strategy, defaults to (5, 14)

None
proxies

proxy configs, defaults to {}

None

Returns:

Type Description

dictionary of session configs for session methods, e.g., get, to use.

Source code in sm_trendy/utilities/request.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def get_request_configs(
    headers=None,
    timeout=None,
    proxies=None,
    cookies=None,
):
    """
    get_session_query_configs creates a session config dictionary for session to use. These are the keyword arguments of the session get or post methods.

    Proxies can be set by providing a dictionary of the form

    ```python
    {
        'http': some super_proxy_url,
        'https': some super_proxy_url,
    }
    ```

    :param headers: header of the method such as use agent, defaults to random user agent from get_random_user_agent
    :param timeout: timeout strategy, defaults to (5, 14)
    :param proxies: proxy configs, defaults to {}
    :return: dictionary of session configs for session methods, e.g., get, to use.
    """

    if headers is None:
        headers = get_random_user_agent()

    if timeout is None:
        timeout = (5, 14)

    if proxies is None:
        proxies = {}

    return dict(headers=headers, proxies=proxies, timeout=timeout)

get_session(retry_params=None, session=None) ¤

get_session prepares a session object.

Parameters:

Name Type Description Default
retry_params dict, optional

the rules to retry, defaults to {"retries": 5, "backoff_factor": 0.3, "status_forcelist": (500, 502, 504)}

None
session [type], optional

[description], defaults to None

None
Source code in sm_trendy/utilities/request.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def get_session(
    retry_params=None,
    session=None,
):
    """
    get_session prepares a session object.

    :param retry_params: the rules to retry, defaults to {"retries": 5, "backoff_factor": 0.3, "status_forcelist": (500, 502, 504)}
    :type retry_params: dict, optional
    :param session: [description], defaults to None
    :type session: [type], optional
    """

    if retry_params is None:
        retry_params = {
            "retries": 5,
            "backoff_factor": 0.3,
            "status_forcelist": (500, 502, 504),
        }

    if session is None:
        session = requests.Session()

    retry = Retry(
        total=retry_params.get("retries"),
        read=retry_params.get("retries"),
        connect=retry_params.get("retries"),
        backoff_factor=retry_params.get("backoff_factor"),
        status_forcelist=retry_params.get("status_forcelist"),
    )

    adapter = HTTPAdapter(max_retries=retry)
    session.mount("http://", adapter)
    session.mount("https://", adapter)

    return session