55import functools
66import re
77from html import unescape
8- from typing import List , Tuple
8+ from typing import List , Optional , Tuple
99
1010# Sentence delimiter, split on a period followed by any type of
1111# whitespace (space, new line, tab, etc.)
12- REGEX_SENTENCE_DELIMITER = re .compile (r"\.(?:\s|$)" )
12+ REGEX_SENTENCE_DELIMITER = re .compile (r"\.(?:\s|$)" , flags = re . M )
1313
1414# Matches on pattern __prefix__ at the beginning of a description
1515# or after a comma
16- REGEX_TECHDOCS_PREFIX = re .compile (r"(?:, |\A)__([\w- ]+)__" )
16+ REGEX_TECHDOCS_PREFIX = re .compile (r"(?:, |\A)__([^_ ]+)__" )
1717
1818# Matches on pattern [link title](https://.../)
1919REGEX_MARKDOWN_LINK = re .compile (r"\[(?P<text>.*?)]\((?P<link>.*?)\)" )
@@ -121,23 +121,35 @@ def get_short_description(description: str) -> str:
121121 :rtype: set
122122 """
123123
124- target_lines = description .splitlines ()
125- relevant_lines = None
126-
127- for i , line in enumerate (target_lines ):
124+ def __simplify (sentence : str ) -> Optional [str ]:
128125 # Edge case for descriptions starting with a note
129- if line .lower ().startswith ("__note__" ):
130- continue
126+ if sentence .lower ().startswith ("__note__" ):
127+ return None
128+
129+ sentence = strip_techdocs_prefixes (sentence )
131130
132- relevant_lines = target_lines [i :]
133- break
131+ # Check that the sentence still has content after stripping prefixes
132+ if len (sentence ) < 2 :
133+ return None
134134
135- if relevant_lines is None :
135+ return sentence + "."
136+
137+ # Find the first relevant sentence
138+ result = next (
139+ simplified
140+ for simplified in iter (
141+ __simplify (sentence )
142+ for sentence in REGEX_SENTENCE_DELIMITER .split (description )
143+ )
144+ if simplified is not None
145+ )
146+
147+ if result is None :
136148 raise ValueError (
137149 f"description does not contain any relevant lines: { description } " ,
138150 )
139151
140- return REGEX_SENTENCE_DELIMITER . split ( " \n " . join ( relevant_lines ), 1 )[ 0 ] + "."
152+ return result
141153
142154
143155def strip_techdocs_prefixes (description : str ) -> str :
@@ -150,11 +162,7 @@ def strip_techdocs_prefixes(description: str) -> str:
150162 :returns: The stripped description
151163 :rtype: str
152164 """
153- result_description = REGEX_TECHDOCS_PREFIX .sub (
154- "" , description .lstrip ()
155- ).lstrip ()
156-
157- return result_description
165+ return REGEX_TECHDOCS_PREFIX .sub ("" , description .lstrip ()).lstrip ()
158166
159167
160168def process_arg_description (description : str ) -> Tuple [str , str ]:
@@ -173,7 +181,6 @@ def process_arg_description(description: str) -> Tuple[str, str]:
173181 return "" , ""
174182
175183 result = get_short_description (description )
176- result = strip_techdocs_prefixes (result )
177184 result = result .replace ("\n " , " " ).replace ("\r " , " " )
178185
179186 # NOTE: Links should only be separated from Rich Markdown links
0 commit comments