Coverage for odmpy/utils.py: 96.7%

3# This file is part of odmpy.

5# odmpy is free software: you can redistribute it and/or modify

6# it under the terms of the GNU General Public License as published by

7# the Free Software Foundation, either version 3 of the License, or

8# (at your option) any later version.

10# odmpy is distributed in the hope that it will be useful,

11# but WITHOUT ANY WARRANTY; without even the implied warranty of

12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

13# GNU General Public License for more details.

14#

15# You should have received a copy of the GNU General Public License

16# along with odmpy. If not, see <http://www.gnu.org/licenses/>.

17#

19import os

20import platform

21import re

22import unicodedata

23import xml.etree.ElementTree as ET

24from mimetypes import guess_type

25from pathlib import Path

26from typing import Optional

28from mutagen.mp3 import MP3 # type: ignore[import]

30#

31# Small utility type functions used across the board

32#

34TIMESTAMP_RE = re.compile(

35 r"^((?P<hr>[0-9]+):)?(?P<min>[0-9]+):(?P<sec>[0-9]+)(\.(?P<ms>[0-9]+))?$"

36)

37ILLEGAL_WIN_PATH_CHARS_RE = re.compile(r'[<>:"/\\|?*]')

38MIMETYPE_MAP = {

39 ".xhtml": "application/xhtml+xml",

40 ".html": "text/html",

41 ".css": "text/css",

42 ".png": "image/png",

43 ".gif": "image/gif",

44 ".jpeg": "image/jpeg",

45 ".jpg": "image/jpeg",

46 ".otf": "font/otf",

47 ".ttf": "font/ttf",

48 ".woff": "font/woff",

49 ".woff2": "font/woff2",

50 ".eot": "application/vnd.ms-fontobject",

51 ".svg": "image/svg+xml",

52 ".ncx": "application/x-dtbncx+xml",

53}

56def guess_mimetype(url: str) -> Optional[str]:

57 """

58 Attempt to guess the mimetype for a given url

60 :param url:

61 :return:

62 """

63 url_path = Path(url)

64 mime_type, _ = guess_type(url_path.name, strict=False)

65 if not mime_type:

66 mime_type = MIMETYPE_MAP.get(url_path.suffix.lower(), None)

67 return mime_type

70def is_windows() -> bool:

71 """

72 Returns True if running on Windows.

74 :return:

75 """

76 return os.name == "nt" or platform.system().lower() == "windows"

79def plural_or_singular_noun(

80 value: float, singular_noun: str, plural_noun: str = ""

81) -> str:

82 """

83 Returns the appropriate noun based on the value provided.

85 :param value:

86 :param singular_noun:

87 :param plural_noun:

88 :return:

89 """

90 if not plural_noun:

91 plural_noun = singular_noun + "s"

92 return plural_noun if value != 1 else singular_noun

95def sanitize_path(text: str, sub_text: str = "-", exclude_chars: str = "") -> str:

96 """

97 Strips invalid characters from a local file path component.

99 :param text:

100 :param sub_text:

101 :param exclude_chars:

102 :return:

103 """

104 if not exclude_chars:

105 exclude_chars = ""

106 if os.name == "nt" or platform.system().lower() == "windows":

107 # just replacing `os.sep` is not enough on Windows

108 # ref https://github.com/ping/odmpy/issues/30

109 text = ILLEGAL_WIN_PATH_CHARS_RE.sub(sub_text, text)

110 for c in exclude_chars:

111 # example, if "-" is in additional_exclude_chars, we can't use "-" as replacement,

112 # so we'll just remove it

113 text = text.replace(

114 c, sub_text if sub_text and sub_text not in exclude_chars else ""

115 )

116

117 text = text.replace(os.sep, sub_text)

118 # also strip away non-printable chars just to be safe

119 return "".join(c for c in text if c.isprintable())

120

121

122def get_element_text(ele: Optional[ET.Element]) -> str:

123 """

124 Returns the element text

125

126 :param ele:

127 :return:

128 """

129 if (ele is not None) and ele.text:

130 return ele.text or ""

131 return ""

132

133

134def parse_duration_to_milliseconds(text: str) -> int:

135 """

136 Converts a duration string into milliseconds

137

138 :param text: A duration string, e.g. "10:15", "10:15.300", "1:10:15"

139 :return:

140 """

141 mobj = TIMESTAMP_RE.match(text)

142 if not mobj:

143 raise ValueError(f"Invalid timestamp text: {text}")

144 hours = int(mobj.group("hr") or 0)

145 minutes = int(mobj.group("min") or 0)

146 seconds = int(mobj.group("sec") or 0)

147 milliseconds = int((mobj.group("ms") or "0").ljust(3, "0"))

148 return hours * 60 * 60 * 1000 + minutes * 60 * 1000 + seconds * 1000 + milliseconds

149

150

151def parse_duration_to_seconds(text: str) -> int:

152 """

153 Converts a duration string into seconds

154

155 :param text: A duration string, e.g. "10:15", "10:15.300", "1:10:15"

156 :return:

157 """

158 return round(parse_duration_to_milliseconds(text) / 1000.0)

159

160

161def mp3_duration_ms(filename: Path) -> int:

162 # Ref: https://github.com/ping/odmpy/pull/3

163 # returns the length of the mp3 in ms

164

165 # eyeD3's audio length function:

166 # audiofile.info.time_secs

167 # returns incorrect times due to its header computation

168 # mutagen does not have this issue

169 audio = MP3(filename)

170 if not audio.info:

171 raise ValueError(f"Unable to parse MP3 info from: {filename}")

172 return int(round(audio.info.length * 1000))

173

174

175# From django

176def slugify(value: str, allow_unicode: bool = False) -> str:

177 """

178 Convert to ASCII if 'allow_unicode' is False. Convert spaces to hyphens.

179 Remove characters that aren't alphanumerics, underscores, or hyphens.

180 Convert to lowercase. Also strip leading and trailing whitespace.

181 """

182 if allow_unicode:

183 value = unicodedata.normalize("NFKC", value)

184 value = re.sub(r"[^\w\s-]", "", value, flags=re.U).strip().lower()

185 return re.sub(r"[-\s]+", "-", value, flags=re.U)

186 value = (

187 unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii")

188 )

189 value = re.sub(r"[^\w\s-]", "", value).strip().lower()

190 return re.sub(r"[-\s]+", "-", value)