Coverage for odmpy/utils.py: 96.7%

61 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-14 08:51 +0000

1# Copyright (C) 2021 github.com/ping 

2# 

3# This file is part of odmpy. 

4# 

5# odmpy is free software: you can redistribute it and/or modify 

6# it under the terms of the GNU General Public License as published by 

7# the Free Software Foundation, either version 3 of the License, or 

8# (at your option) any later version. 

9# 

10# odmpy is distributed in the hope that it will be useful, 

11# but WITHOUT ANY WARRANTY; without even the implied warranty of 

12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

13# GNU General Public License for more details. 

14# 

15# You should have received a copy of the GNU General Public License 

16# along with odmpy. If not, see <http://www.gnu.org/licenses/>. 

17# 

18 

19import os 

20import platform 

21import re 

22import unicodedata 

23import xml.etree.ElementTree as ET 

24from mimetypes import guess_type 

25from pathlib import Path 

26from typing import Optional 

27 

28from mutagen.mp3 import MP3 # type: ignore[import] 

29 

30# 

31# Small utility type functions used across the board 

32# 

33 

34TIMESTAMP_RE = re.compile( 

35 r"^((?P<hr>[0-9]+):)?(?P<min>[0-9]+):(?P<sec>[0-9]+)(\.(?P<ms>[0-9]+))?$" 

36) 

37ILLEGAL_WIN_PATH_CHARS_RE = re.compile(r'[<>:"/\\|?*]') 

38MIMETYPE_MAP = { 

39 ".xhtml": "application/xhtml+xml", 

40 ".html": "text/html", 

41 ".css": "text/css", 

42 ".png": "image/png", 

43 ".gif": "image/gif", 

44 ".jpeg": "image/jpeg", 

45 ".jpg": "image/jpeg", 

46 ".otf": "font/otf", 

47 ".ttf": "font/ttf", 

48 ".woff": "font/woff", 

49 ".woff2": "font/woff2", 

50 ".eot": "application/vnd.ms-fontobject", 

51 ".svg": "image/svg+xml", 

52 ".ncx": "application/x-dtbncx+xml", 

53} 

54 

55 

56def guess_mimetype(url: str) -> Optional[str]: 

57 """ 

58 Attempt to guess the mimetype for a given url 

59 

60 :param url: 

61 :return: 

62 """ 

63 url_path = Path(url) 

64 mime_type, _ = guess_type(url_path.name, strict=False) 

65 if not mime_type: 

66 mime_type = MIMETYPE_MAP.get(url_path.suffix.lower(), None) 

67 return mime_type 

68 

69 

70def is_windows() -> bool: 

71 """ 

72 Returns True if running on Windows. 

73 

74 :return: 

75 """ 

76 return os.name == "nt" or platform.system().lower() == "windows" 

77 

78 

79def plural_or_singular_noun( 

80 value: float, singular_noun: str, plural_noun: str = "" 

81) -> str: 

82 """ 

83 Returns the appropriate noun based on the value provided. 

84 

85 :param value: 

86 :param singular_noun: 

87 :param plural_noun: 

88 :return: 

89 """ 

90 if not plural_noun: 

91 plural_noun = singular_noun + "s" 

92 return plural_noun if value != 1 else singular_noun 

93 

94 

95def sanitize_path(text: str, sub_text: str = "-", exclude_chars: str = "") -> str: 

96 """ 

97 Strips invalid characters from a local file path component. 

98 

99 :param text: 

100 :param sub_text: 

101 :param exclude_chars: 

102 :return: 

103 """ 

104 if not exclude_chars: 

105 exclude_chars = "" 

106 if os.name == "nt" or platform.system().lower() == "windows": 

107 # just replacing `os.sep` is not enough on Windows 

108 # ref https://github.com/ping/odmpy/issues/30 

109 text = ILLEGAL_WIN_PATH_CHARS_RE.sub(sub_text, text) 

110 for c in exclude_chars: 

111 # example, if "-" is in additional_exclude_chars, we can't use "-" as replacement, 

112 # so we'll just remove it 

113 text = text.replace( 

114 c, sub_text if sub_text and sub_text not in exclude_chars else "" 

115 ) 

116 

117 text = text.replace(os.sep, sub_text) 

118 # also strip away non-printable chars just to be safe 

119 return "".join(c for c in text if c.isprintable()) 

120 

121 

122def get_element_text(ele: Optional[ET.Element]) -> str: 

123 """ 

124 Returns the element text 

125 

126 :param ele: 

127 :return: 

128 """ 

129 if (ele is not None) and ele.text: 

130 return ele.text or "" 

131 return "" 

132 

133 

134def parse_duration_to_milliseconds(text: str) -> int: 

135 """ 

136 Converts a duration string into milliseconds 

137 

138 :param text: A duration string, e.g. "10:15", "10:15.300", "1:10:15" 

139 :return: 

140 """ 

141 mobj = TIMESTAMP_RE.match(text) 

142 if not mobj: 

143 raise ValueError(f"Invalid timestamp text: {text}") 

144 hours = int(mobj.group("hr") or 0) 

145 minutes = int(mobj.group("min") or 0) 

146 seconds = int(mobj.group("sec") or 0) 

147 milliseconds = int((mobj.group("ms") or "0").ljust(3, "0")) 

148 return hours * 60 * 60 * 1000 + minutes * 60 * 1000 + seconds * 1000 + milliseconds 

149 

150 

151def parse_duration_to_seconds(text: str) -> int: 

152 """ 

153 Converts a duration string into seconds 

154 

155 :param text: A duration string, e.g. "10:15", "10:15.300", "1:10:15" 

156 :return: 

157 """ 

158 return round(parse_duration_to_milliseconds(text) / 1000.0) 

159 

160 

161def mp3_duration_ms(filename: Path) -> int: 

162 # Ref: https://github.com/ping/odmpy/pull/3 

163 # returns the length of the mp3 in ms 

164 

165 # eyeD3's audio length function: 

166 # audiofile.info.time_secs 

167 # returns incorrect times due to its header computation 

168 # mutagen does not have this issue 

169 audio = MP3(filename) 

170 if not audio.info: 

171 raise ValueError(f"Unable to parse MP3 info from: {filename}") 

172 return int(round(audio.info.length * 1000)) 

173 

174 

175# From django 

176def slugify(value: str, allow_unicode: bool = False) -> str: 

177 """ 

178 Convert to ASCII if 'allow_unicode' is False. Convert spaces to hyphens. 

179 Remove characters that aren't alphanumerics, underscores, or hyphens. 

180 Convert to lowercase. Also strip leading and trailing whitespace. 

181 """ 

182 if allow_unicode: 

183 value = unicodedata.normalize("NFKC", value) 

184 value = re.sub(r"[^\w\s-]", "", value, flags=re.U).strip().lower() 

185 return re.sub(r"[-\s]+", "-", value, flags=re.U) 

186 value = ( 

187 unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii") 

188 ) 

189 value = re.sub(r"[^\w\s-]", "", value).strip().lower() 

190 return re.sub(r"[-\s]+", "-", value)