import os import pytest from pathlib import Path from ocrag.gitignore import GitignoreMatcher, get_gitignore_matcher, find_project_root from ocrag.utils import is_text_file class TestIsTextFile: def test_python_file_is_text(self, tmp_path): f = tmp_path / "test.py" f.write_text("def foo():\n pass") assert is_text_file(str(f)) is True def test_binary_file_is_not_text(self, tmp_path): f = tmp_path / "test.bin" f.write_bytes(b"\x00\x01\x02\x03binary") assert is_text_file(str(f)) is False def test_empty_file_is_text(self, tmp_path): f = tmp_path / "empty.txt" f.write_text("") assert is_text_file(str(f)) is True def test_utf8_with_non_ascii_is_text(self, tmp_path): f = tmp_path / "unicode.txt" f.write_text("中文内容测试\n日本語\n한국어") assert is_text_file(str(f)) is True def test_invalid_utf8_is_not_text(self, tmp_path): f = tmp_path / "invalid.txt" f.write_bytes(b"hello\x80world") assert is_text_file(str(f)) is False class TestFindProjectRoot: def test_finds_gitignore(self, tmp_path): (tmp_path / ".gitignore").write_text("*.pyc") sub = tmp_path / "src" / "deep" sub.mkdir(parents=True) found = find_project_root(sub) assert found == tmp_path def test_no_gitignore_returns_none(self, tmp_path): sub = tmp_path / "src" sub.mkdir(parents=True) found = find_project_root(sub) assert found is None class TestGitignoreMatcher: def test_ignore_pattern(self, tmp_path): (tmp_path / ".gitignore").write_text("*.pyc\n__pycache__/\n") matcher = GitignoreMatcher(tmp_path) assert matcher.is_ignored(tmp_path / "file.pyc") is True assert matcher.is_ignored(tmp_path / "file.py") is False assert matcher.is_ignored(tmp_path / "__pycache__" / "cache.pyc") is True def test_negation_pattern(self, tmp_path): (tmp_path / ".gitignore").write_text("*.log\n!important.log\n") matcher = GitignoreMatcher(tmp_path) assert matcher.is_ignored(tmp_path / "debug.log") is True assert matcher.is_ignored(tmp_path / "important.log") is False def test_subdirectory_gitignore(self, tmp_path): (tmp_path / ".gitignore").write_text("*.py\n") sub_dir = tmp_path / "src" sub_dir.mkdir() (sub_dir / ".gitignore").write_text("test_*.py\n") matcher = GitignoreMatcher(tmp_path) assert matcher.is_ignored(tmp_path / "main.py") is True assert matcher.is_ignored(tmp_path / "utils.py") is True assert matcher.is_ignored(sub_dir / "helper.py") is True assert matcher.is_ignored(sub_dir / "test_main.py") is True def test_anchored_pattern(self, tmp_path): (tmp_path / ".gitignore").write_text("/build\n") matcher = GitignoreMatcher(tmp_path) assert matcher.is_ignored(tmp_path / "build" / "output") is True assert matcher.is_ignored(tmp_path / "src" / "build" / "out") is False def test_ignore_directory_ending_slash(self, tmp_path): (tmp_path / ".gitignore").write_text("node_modules/\n") matcher = GitignoreMatcher(tmp_path) assert matcher.is_ignored(tmp_path / "node_modules" / "package.json") is True assert matcher.is_ignored(tmp_path / "other" / "node_modules" / "file") is True def test_get_matcher_returns_none_when_no_gitignore(self, tmp_path): result = get_gitignore_matcher(tmp_path / "nonexistent") assert result is None def test_get_matcher_returns_matcher(self, tmp_path): (tmp_path / ".gitignore").write_text("*.pyc") result = get_gitignore_matcher(tmp_path) assert result is not None assert result.is_ignored(tmp_path / "file.pyc") is True def test_comments_ignored(self, tmp_path): (tmp_path / ".gitignore").write_text( "# this is a comment\n*.log\n # another comment\n" ) matcher = GitignoreMatcher(tmp_path) assert matcher.is_ignored(tmp_path / "debug.log") is True def test_whitespace_lines(self, tmp_path): (tmp_path / ".gitignore").write_text(" *.log\n") matcher = GitignoreMatcher(tmp_path) assert matcher.is_ignored(tmp_path / " debug.log") is True assert matcher.is_ignored(tmp_path / "debug.log") is False def test_double_star_pattern(self, tmp_path): (tmp_path / ".gitignore").write_text("**/*.pyc\n") matcher = GitignoreMatcher(tmp_path) assert matcher.is_ignored(tmp_path / "a.pyc") is True assert matcher.is_ignored(tmp_path / "src" / "a.pyc") is True assert matcher.is_ignored(tmp_path / "src" / "deep" / "a.pyc") is True def test_no_gitignore_file(self, tmp_path): matcher = GitignoreMatcher(tmp_path) assert matcher.is_ignored(tmp_path / "file.py") is False assert matcher.is_ignored(tmp_path / "anything") is False