Skip to content

Utilities - Config

Utilities - Config¤

ConfigTable ¤

ConfigBundle to table

Parameters:

Name Type Description Default
config_bundle

serpapi config bundle

required
Source code in sm_trendy/utilities/config.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
class ConfigTable:
    """
    ConfigBundle to table

    :param config_bundle: serpapi config bundle
    """

    def __init__(self, config_bundle):
        self.config_bundle = config_bundle

    def table(self, top_n: int = 10):
        """
        :param top_n: only display the top_n configs
        """
        parent_folder = self.config_bundle.global_config.get("path", {}).get(
            "parent_folder"
        )
        n_configs = len(self.config_bundle)
        table = Table(
            title=(
                f"Configs: {n_configs};\n"
                f"Path: {str(parent_folder)};\n"
                f"Top N: {top_n}"
            ),
            show_lines=True,
        )

        table.add_column("q", justify="right", style="cyan", no_wrap=False)
        table.add_column("cat", style="magenta", no_wrap=False)
        table.add_column("geo", style="magenta", no_wrap=False)
        table.add_column("timeframe", style="magenta", no_wrap=False)

        for idx, c in enumerate(self.config_bundle):
            if idx > top_n:
                break
            else:
                table.add_row(
                    c.serpapi_params.q,
                    c.serpapi_params.cat,
                    c.serpapi_params.geo,
                    c.serpapi_params.date,
                )

        return table

table(top_n=10) ¤

Parameters:

Name Type Description Default
top_n int

only display the top_n configs

10
Source code in sm_trendy/utilities/config.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
def table(self, top_n: int = 10):
    """
    :param top_n: only display the top_n configs
    """
    parent_folder = self.config_bundle.global_config.get("path", {}).get(
        "parent_folder"
    )
    n_configs = len(self.config_bundle)
    table = Table(
        title=(
            f"Configs: {n_configs};\n"
            f"Path: {str(parent_folder)};\n"
            f"Top N: {top_n}"
        ),
        show_lines=True,
    )

    table.add_column("q", justify="right", style="cyan", no_wrap=False)
    table.add_column("cat", style="magenta", no_wrap=False)
    table.add_column("geo", style="magenta", no_wrap=False)
    table.add_column("timeframe", style="magenta", no_wrap=False)

    for idx, c in enumerate(self.config_bundle):
        if idx > top_n:
            break
        else:
            table.add_row(
                c.serpapi_params.q,
                c.serpapi_params.cat,
                c.serpapi_params.geo,
                c.serpapi_params.date,
            )

    return table

PathParams ¤

Bases: BaseModel

parameters to be used to build the folder path of the data files.

Note

We will have to keep the order of the parameters.

Pydantic already tries to provide ordered schema. In addition, we added a property called path_schema.

Source code in sm_trendy/utilities/config.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
class PathParams(BaseModel):
    """parameters to be used to build the folder
    path of the data files.

    !!! note

        We will have to keep the order of the parameters.

        Pydantic already tries to provide ordered schema.
        In addition, we added a property called `path_schema`.
    """

    keyword: str
    cat: str
    geo: str
    timeframe: str

    @property
    def path_schema(self) -> OrderedDict:
        """Ordered dictionary for the path schema

        Note that keyword and category will be slugified
        """
        return OrderedDict(
            [
                ("keyword", slugify(self.keyword)),
                ("cat", slugify(self.cat)),
                ("geo", slugify(self.geo)),
                ("timeframe", slugify(self.timeframe)),
            ]
        )

    def path(self, parent_folder: AnyPath) -> AnyPath:
        """build the path under the parent folder

        :param parent_folder: base path
        """
        if not isinstance(parent_folder, AnyPath):
            parent_folder = AnyPath(parent_folder)
        folder = parent_folder
        for k in self.path_schema:
            folder = folder / f"{k}={self.path_schema[k]}"

        return folder

    def s3_access_point(
        self,
        base_url: str,
        snapshot_date: Union[datetime.date, Literal["latest"]] = "latest",
        format: Optional[Literal["json"]] = "json",
        filename: Optional[str] = "data.json",
    ):
        """
        An access point is some kind of URL. For example
        https://sm-google-trend-public.s3.eu-central-1.amazonaws.com/
        agg/keyword=curtain/cat=0/geo=de/timeframe=today-5-y/format=json/
        snapshot_date=latest/data.json

        :param base_url: base url to append the path to
        :param snapshot_date: snapshot of the file
        :param filename: name of the file
        """
        if base_url.endswith("/"):
            base_url = base_url[:-1]

        url = base_url
        for k in self.path_schema:
            url += f"/{k}={self.path_schema[k]}"

        url += f"/format={format}/snapshot_date={snapshot_date}/{filename}"

        return url

    def s3_path(
        self,
        parent_folder: AnyPath,
        snapshot_date: Union[datetime.date, Literal["latest"]] = "latest",
        format: Optional[Literal["json"]] = "json",
        filename: Optional[str] = "data.json",
    ) -> AnyPath:
        """build the path under the parent folder, for specific file name and format

        :param parent_folder: base path
        :param snapshot_date: which snapshot date to use
        :param format: which format to take
        :param filename: which file name to take
        """
        if not isinstance(parent_folder, AnyPath):
            parent_folder = AnyPath(parent_folder)

        folder = parent_folder
        for k in self.path_schema:
            folder = folder / f"{k}={self.path_schema[k]}"

        path = (
            folder
            / f"format={format}"
            / f"snapshot_date={snapshot_date}"
            / f"{filename}"
        )

        return path

path_schema: OrderedDict property ¤

Ordered dictionary for the path schema

Note that keyword and category will be slugified

path(parent_folder) ¤

build the path under the parent folder

Parameters:

Name Type Description Default
parent_folder AnyPath

base path

required
Source code in sm_trendy/utilities/config.py
69
70
71
72
73
74
75
76
77
78
79
80
def path(self, parent_folder: AnyPath) -> AnyPath:
    """build the path under the parent folder

    :param parent_folder: base path
    """
    if not isinstance(parent_folder, AnyPath):
        parent_folder = AnyPath(parent_folder)
    folder = parent_folder
    for k in self.path_schema:
        folder = folder / f"{k}={self.path_schema[k]}"

    return folder

s3_access_point(base_url, snapshot_date='latest', format='json', filename='data.json') ¤

An access point is some kind of URL. For example https://sm-google-trend-public.s3.eu-central-1.amazonaws.com/ agg/keyword=curtain/cat=0/geo=de/timeframe=today-5-y/format=json/ snapshot_date=latest/data.json

Parameters:

Name Type Description Default
base_url str

base url to append the path to

required
snapshot_date Union[datetime.date, Literal['latest']]

snapshot of the file

'latest'
filename Optional[str]

name of the file

'data.json'
Source code in sm_trendy/utilities/config.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def s3_access_point(
    self,
    base_url: str,
    snapshot_date: Union[datetime.date, Literal["latest"]] = "latest",
    format: Optional[Literal["json"]] = "json",
    filename: Optional[str] = "data.json",
):
    """
    An access point is some kind of URL. For example
    https://sm-google-trend-public.s3.eu-central-1.amazonaws.com/
    agg/keyword=curtain/cat=0/geo=de/timeframe=today-5-y/format=json/
    snapshot_date=latest/data.json

    :param base_url: base url to append the path to
    :param snapshot_date: snapshot of the file
    :param filename: name of the file
    """
    if base_url.endswith("/"):
        base_url = base_url[:-1]

    url = base_url
    for k in self.path_schema:
        url += f"/{k}={self.path_schema[k]}"

    url += f"/format={format}/snapshot_date={snapshot_date}/{filename}"

    return url

s3_path(parent_folder, snapshot_date='latest', format='json', filename='data.json') ¤

build the path under the parent folder, for specific file name and format

Parameters:

Name Type Description Default
parent_folder AnyPath

base path

required
snapshot_date Union[datetime.date, Literal['latest']]

which snapshot date to use

'latest'
format Optional[Literal['json']]

which format to take

'json'
filename Optional[str]

which file name to take

'data.json'
Source code in sm_trendy/utilities/config.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def s3_path(
    self,
    parent_folder: AnyPath,
    snapshot_date: Union[datetime.date, Literal["latest"]] = "latest",
    format: Optional[Literal["json"]] = "json",
    filename: Optional[str] = "data.json",
) -> AnyPath:
    """build the path under the parent folder, for specific file name and format

    :param parent_folder: base path
    :param snapshot_date: which snapshot date to use
    :param format: which format to take
    :param filename: which file name to take
    """
    if not isinstance(parent_folder, AnyPath):
        parent_folder = AnyPath(parent_folder)

    folder = parent_folder
    for k in self.path_schema:
        folder = folder / f"{k}={self.path_schema[k]}"

    path = (
        folder
        / f"format={format}"
        / f"snapshot_date={snapshot_date}"
        / f"{filename}"
    )

    return path

convert_path(raw_config) ¤

Convert str representation of path to Path

Parameters:

Name Type Description Default
raw_config Dict

raw config

required
Source code in sm_trendy/utilities/config.py
14
15
16
17
18
19
20
21
22
23
24
25
26
def convert_path(raw_config: Dict) -> Dict:
    """Convert str representation of path to Path

    :param raw_config: raw config
    """
    keys = ["parent_folder"]
    config = copy.deepcopy(raw_config)

    for k in keys:
        if (not isinstance(config.get(k), AnyPath)) and (config.get(k) is not None):
            config[k] = config[k]

    return config