Skip to content

Commit b7138e7

Browse files
chrisdpurcellclaude
andcommitted
fix: handle UTF-8-SIG in convert_to_utf8, add status_changed on error
- Extend the early-return guard to include 'utf8sig' so chardet's UTF-8-SIG label (BOM files) is treated as already UTF-8. - Emit status_changed("Error converting file") in the except block, consistent with save_file and load_file error paths. - Remove redundant inline TextDocument imports from TestConvertToUtf8 (import already exists at module level). - Upgrade test_no_op_when_no_document and test_no_op_when_already_utf8 to assert the expected status_changed message is emitted. - Add test_no_op_when_utf8_with_bom to cover the UTF-8-SIG guard. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 283f2eb commit b7138e7

2 files changed

Lines changed: 20 additions & 9 deletions

File tree

src/viewmodels/main_viewmodel.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,10 @@ def convert_to_utf8(self, current_text: str) -> None:
179179
self.status_changed.emit("No document loaded")
180180
return
181181
# Normalise: strip dashes and lowercase so 'UTF-8', 'utf-8', 'utf8' all match.
182+
# UTF-8-SIG is the chardet name for UTF-8 with a BOM — treat it as already UTF-8
183+
# to avoid needlessly re-saving BOM files.
182184
current_encoding = self._current_document.encoding.lower().replace("-", "")
183-
if current_encoding in ("utf8",):
185+
if current_encoding in {"utf8", "utf8sig"}:
184186
self.status_changed.emit("File is already UTF-8")
185187
return
186188
doc = TextDocument(
@@ -198,3 +200,4 @@ def convert_to_utf8(self, current_text: str) -> None:
198200
msg = f"Cannot convert file: {e}"
199201
logger.error(msg)
200202
self.error_occurred.emit(msg)
203+
self.status_changed.emit("Error converting file")

tests/unit/test_main_viewmodel.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ def test_replace_all_uses_current_text_when_provided(self, vm, qtbot):
153153
class TestConvertToUtf8:
154154
def test_saves_file_with_utf8_encoding(self, vm, mock_file_svc, qtbot):
155155
"""convert_to_utf8 must call save_file with encoding='utf-8'."""
156-
from src.models.text_document import TextDocument
157156
mock_file_svc.open_file.return_value = TextDocument(
158157
filepath="/tmp/latin.txt", content="caf\u00e9", encoding="ISO-8859-1"
159158
)
@@ -165,7 +164,6 @@ def test_saves_file_with_utf8_encoding(self, vm, mock_file_svc, qtbot):
165164

166165
def test_emits_encoding_detected_utf8(self, vm, mock_file_svc, qtbot):
167166
"""convert_to_utf8 must emit encoding_detected('utf-8')."""
168-
from src.models.text_document import TextDocument
169167
mock_file_svc.open_file.return_value = TextDocument(
170168
filepath="/tmp/latin.txt", content="caf\u00e9", encoding="ISO-8859-1"
171169
)
@@ -176,7 +174,6 @@ def test_emits_encoding_detected_utf8(self, vm, mock_file_svc, qtbot):
176174

177175
def test_emits_file_saved(self, vm, mock_file_svc, qtbot):
178176
"""convert_to_utf8 must emit file_saved after a successful save."""
179-
from src.models.text_document import TextDocument
180177
mock_file_svc.open_file.return_value = TextDocument(
181178
filepath="/tmp/latin.txt", content="caf\u00e9", encoding="ISO-8859-1"
182179
)
@@ -187,21 +184,32 @@ def test_emits_file_saved(self, vm, mock_file_svc, qtbot):
187184

188185
def test_no_op_when_already_utf8(self, vm, mock_file_svc, qtbot):
189186
"""convert_to_utf8 must not save when encoding is already utf-8."""
190-
from src.models.text_document import TextDocument
191187
mock_file_svc.open_file.return_value = TextDocument(
192188
filepath="/tmp/utf8.txt", content="hello", encoding="utf-8"
193189
)
194190
vm.load_file("/tmp/utf8.txt")
195-
vm.convert_to_utf8("hello")
191+
with qtbot.waitSignal(vm.status_changed, timeout=1000) as blocker:
192+
vm.convert_to_utf8("hello")
196193
mock_file_svc.save_file.assert_not_called()
194+
assert "already" in blocker.args[0].lower()
197195

198196
def test_no_op_when_no_document(self, vm, qtbot):
199-
"""convert_to_utf8 with no loaded document must be silent."""
200-
vm.convert_to_utf8("some text") # must not raise
197+
"""convert_to_utf8 with no loaded document emits status_changed."""
198+
with qtbot.waitSignal(vm.status_changed, timeout=1000) as blocker:
199+
vm.convert_to_utf8("some text")
200+
assert "no document" in blocker.args[0].lower()
201+
202+
def test_no_op_when_utf8_with_bom(self, vm, mock_file_svc, qtbot):
203+
"""UTF-8-SIG (BOM) files must be treated as already UTF-8."""
204+
mock_file_svc.open_file.return_value = TextDocument(
205+
filepath="/tmp/bom.txt", content="hello", encoding="UTF-8-SIG"
206+
)
207+
vm.load_file("/tmp/bom.txt")
208+
vm.convert_to_utf8("hello")
209+
mock_file_svc.save_file.assert_not_called()
201210

202211
def test_emits_error_on_save_failure(self, vm, mock_file_svc, qtbot):
203212
"""convert_to_utf8 must emit error_occurred when save_file raises."""
204-
from src.models.text_document import TextDocument
205213
mock_file_svc.open_file.return_value = TextDocument(
206214
filepath="/tmp/latin.txt", content="caf\u00e9", encoding="ISO-8859-1"
207215
)

0 commit comments

Comments
 (0)