Coverage for odmpy/utils.py: 96.7%
61 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-14 08:51 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-14 08:51 +0000
1# Copyright (C) 2021 github.com/ping
2#
3# This file is part of odmpy.
4#
5# odmpy is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9#
10# odmpy is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with odmpy. If not, see <http://www.gnu.org/licenses/>.
17#
19import os
20import platform
21import re
22import unicodedata
23import xml.etree.ElementTree as ET
24from mimetypes import guess_type
25from pathlib import Path
26from typing import Optional
28from mutagen.mp3 import MP3 # type: ignore[import]
30#
31# Small utility type functions used across the board
32#
34TIMESTAMP_RE = re.compile(
35 r"^((?P<hr>[0-9]+):)?(?P<min>[0-9]+):(?P<sec>[0-9]+)(\.(?P<ms>[0-9]+))?$"
36)
37ILLEGAL_WIN_PATH_CHARS_RE = re.compile(r'[<>:"/\\|?*]')
38MIMETYPE_MAP = {
39 ".xhtml": "application/xhtml+xml",
40 ".html": "text/html",
41 ".css": "text/css",
42 ".png": "image/png",
43 ".gif": "image/gif",
44 ".jpeg": "image/jpeg",
45 ".jpg": "image/jpeg",
46 ".otf": "font/otf",
47 ".ttf": "font/ttf",
48 ".woff": "font/woff",
49 ".woff2": "font/woff2",
50 ".eot": "application/vnd.ms-fontobject",
51 ".svg": "image/svg+xml",
52 ".ncx": "application/x-dtbncx+xml",
53}
56def guess_mimetype(url: str) -> Optional[str]:
57 """
58 Attempt to guess the mimetype for a given url
60 :param url:
61 :return:
62 """
63 url_path = Path(url)
64 mime_type, _ = guess_type(url_path.name, strict=False)
65 if not mime_type:
66 mime_type = MIMETYPE_MAP.get(url_path.suffix.lower(), None)
67 return mime_type
70def is_windows() -> bool:
71 """
72 Returns True if running on Windows.
74 :return:
75 """
76 return os.name == "nt" or platform.system().lower() == "windows"
79def plural_or_singular_noun(
80 value: float, singular_noun: str, plural_noun: str = ""
81) -> str:
82 """
83 Returns the appropriate noun based on the value provided.
85 :param value:
86 :param singular_noun:
87 :param plural_noun:
88 :return:
89 """
90 if not plural_noun:
91 plural_noun = singular_noun + "s"
92 return plural_noun if value != 1 else singular_noun
95def sanitize_path(text: str, sub_text: str = "-", exclude_chars: str = "") -> str:
96 """
97 Strips invalid characters from a local file path component.
99 :param text:
100 :param sub_text:
101 :param exclude_chars:
102 :return:
103 """
104 if not exclude_chars:
105 exclude_chars = ""
106 if os.name == "nt" or platform.system().lower() == "windows":
107 # just replacing `os.sep` is not enough on Windows
108 # ref https://github.com/ping/odmpy/issues/30
109 text = ILLEGAL_WIN_PATH_CHARS_RE.sub(sub_text, text)
110 for c in exclude_chars:
111 # example, if "-" is in additional_exclude_chars, we can't use "-" as replacement,
112 # so we'll just remove it
113 text = text.replace(
114 c, sub_text if sub_text and sub_text not in exclude_chars else ""
115 )
117 text = text.replace(os.sep, sub_text)
118 # also strip away non-printable chars just to be safe
119 return "".join(c for c in text if c.isprintable())
122def get_element_text(ele: Optional[ET.Element]) -> str:
123 """
124 Returns the element text
126 :param ele:
127 :return:
128 """
129 if (ele is not None) and ele.text:
130 return ele.text or ""
131 return ""
134def parse_duration_to_milliseconds(text: str) -> int:
135 """
136 Converts a duration string into milliseconds
138 :param text: A duration string, e.g. "10:15", "10:15.300", "1:10:15"
139 :return:
140 """
141 mobj = TIMESTAMP_RE.match(text)
142 if not mobj:
143 raise ValueError(f"Invalid timestamp text: {text}")
144 hours = int(mobj.group("hr") or 0)
145 minutes = int(mobj.group("min") or 0)
146 seconds = int(mobj.group("sec") or 0)
147 milliseconds = int((mobj.group("ms") or "0").ljust(3, "0"))
148 return hours * 60 * 60 * 1000 + minutes * 60 * 1000 + seconds * 1000 + milliseconds
151def parse_duration_to_seconds(text: str) -> int:
152 """
153 Converts a duration string into seconds
155 :param text: A duration string, e.g. "10:15", "10:15.300", "1:10:15"
156 :return:
157 """
158 return round(parse_duration_to_milliseconds(text) / 1000.0)
161def mp3_duration_ms(filename: Path) -> int:
162 # Ref: https://github.com/ping/odmpy/pull/3
163 # returns the length of the mp3 in ms
165 # eyeD3's audio length function:
166 # audiofile.info.time_secs
167 # returns incorrect times due to its header computation
168 # mutagen does not have this issue
169 audio = MP3(filename)
170 if not audio.info:
171 raise ValueError(f"Unable to parse MP3 info from: {filename}")
172 return int(round(audio.info.length * 1000))
175# From django
176def slugify(value: str, allow_unicode: bool = False) -> str:
177 """
178 Convert to ASCII if 'allow_unicode' is False. Convert spaces to hyphens.
179 Remove characters that aren't alphanumerics, underscores, or hyphens.
180 Convert to lowercase. Also strip leading and trailing whitespace.
181 """
182 if allow_unicode:
183 value = unicodedata.normalize("NFKC", value)
184 value = re.sub(r"[^\w\s-]", "", value, flags=re.U).strip().lower()
185 return re.sub(r"[-\s]+", "-", value, flags=re.U)
186 value = (
187 unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii")
188 )
189 value = re.sub(r"[^\w\s-]", "", value).strip().lower()
190 return re.sub(r"[-\s]+", "-", value)