127 lines
4.9 KiB
Python
127 lines
4.9 KiB
Python
import os
|
|
import pytest
|
|
from pathlib import Path
|
|
from ocrag.gitignore import GitignoreMatcher, get_gitignore_matcher, find_project_root
|
|
from ocrag.utils import is_text_file
|
|
|
|
|
|
class TestIsTextFile:
|
|
def test_python_file_is_text(self, tmp_path):
|
|
f = tmp_path / "test.py"
|
|
f.write_text("def foo():\n pass")
|
|
assert is_text_file(str(f)) is True
|
|
|
|
def test_binary_file_is_not_text(self, tmp_path):
|
|
f = tmp_path / "test.bin"
|
|
f.write_bytes(b"\x00\x01\x02\x03binary")
|
|
assert is_text_file(str(f)) is False
|
|
|
|
def test_empty_file_is_text(self, tmp_path):
|
|
f = tmp_path / "empty.txt"
|
|
f.write_text("")
|
|
assert is_text_file(str(f)) is True
|
|
|
|
def test_utf8_with_non_ascii_is_text(self, tmp_path):
|
|
f = tmp_path / "unicode.txt"
|
|
f.write_text("中文内容测试\n日本語\n한국어")
|
|
assert is_text_file(str(f)) is True
|
|
|
|
def test_invalid_utf8_is_not_text(self, tmp_path):
|
|
f = tmp_path / "invalid.txt"
|
|
f.write_bytes(b"hello\x80world")
|
|
assert is_text_file(str(f)) is False
|
|
|
|
|
|
class TestFindProjectRoot:
|
|
def test_finds_gitignore(self, tmp_path):
|
|
(tmp_path / ".gitignore").write_text("*.pyc")
|
|
sub = tmp_path / "src" / "deep"
|
|
sub.mkdir(parents=True)
|
|
found = find_project_root(sub)
|
|
assert found == tmp_path
|
|
|
|
def test_no_gitignore_returns_none(self, tmp_path):
|
|
sub = tmp_path / "src"
|
|
sub.mkdir(parents=True)
|
|
found = find_project_root(sub)
|
|
assert found is None
|
|
|
|
|
|
class TestGitignoreMatcher:
|
|
def test_ignore_pattern(self, tmp_path):
|
|
(tmp_path / ".gitignore").write_text("*.pyc\n__pycache__/\n")
|
|
matcher = GitignoreMatcher(tmp_path)
|
|
|
|
assert matcher.is_ignored(tmp_path / "file.pyc") is True
|
|
assert matcher.is_ignored(tmp_path / "file.py") is False
|
|
assert matcher.is_ignored(tmp_path / "__pycache__" / "cache.pyc") is True
|
|
|
|
def test_negation_pattern(self, tmp_path):
|
|
(tmp_path / ".gitignore").write_text("*.log\n!important.log\n")
|
|
matcher = GitignoreMatcher(tmp_path)
|
|
|
|
assert matcher.is_ignored(tmp_path / "debug.log") is True
|
|
assert matcher.is_ignored(tmp_path / "important.log") is False
|
|
|
|
def test_subdirectory_gitignore(self, tmp_path):
|
|
(tmp_path / ".gitignore").write_text("*.py\n")
|
|
sub_dir = tmp_path / "src"
|
|
sub_dir.mkdir()
|
|
(sub_dir / ".gitignore").write_text("test_*.py\n")
|
|
matcher = GitignoreMatcher(tmp_path)
|
|
|
|
assert matcher.is_ignored(tmp_path / "main.py") is True
|
|
assert matcher.is_ignored(tmp_path / "utils.py") is True
|
|
assert matcher.is_ignored(sub_dir / "helper.py") is True
|
|
assert matcher.is_ignored(sub_dir / "test_main.py") is True
|
|
|
|
def test_anchored_pattern(self, tmp_path):
|
|
(tmp_path / ".gitignore").write_text("/build\n")
|
|
matcher = GitignoreMatcher(tmp_path)
|
|
|
|
assert matcher.is_ignored(tmp_path / "build" / "output") is True
|
|
assert matcher.is_ignored(tmp_path / "src" / "build" / "out") is False
|
|
|
|
def test_ignore_directory_ending_slash(self, tmp_path):
|
|
(tmp_path / ".gitignore").write_text("node_modules/\n")
|
|
matcher = GitignoreMatcher(tmp_path)
|
|
|
|
assert matcher.is_ignored(tmp_path / "node_modules" / "package.json") is True
|
|
assert matcher.is_ignored(tmp_path / "other" / "node_modules" / "file") is True
|
|
|
|
def test_get_matcher_returns_none_when_no_gitignore(self, tmp_path):
|
|
result = get_gitignore_matcher(tmp_path / "nonexistent")
|
|
assert result is None
|
|
|
|
def test_get_matcher_returns_matcher(self, tmp_path):
|
|
(tmp_path / ".gitignore").write_text("*.pyc")
|
|
result = get_gitignore_matcher(tmp_path)
|
|
assert result is not None
|
|
assert result.is_ignored(tmp_path / "file.pyc") is True
|
|
|
|
def test_comments_ignored(self, tmp_path):
|
|
(tmp_path / ".gitignore").write_text(
|
|
"# this is a comment\n*.log\n # another comment\n"
|
|
)
|
|
matcher = GitignoreMatcher(tmp_path)
|
|
assert matcher.is_ignored(tmp_path / "debug.log") is True
|
|
|
|
def test_whitespace_lines(self, tmp_path):
|
|
(tmp_path / ".gitignore").write_text(" *.log\n")
|
|
matcher = GitignoreMatcher(tmp_path)
|
|
assert matcher.is_ignored(tmp_path / " debug.log") is True
|
|
assert matcher.is_ignored(tmp_path / "debug.log") is False
|
|
|
|
def test_double_star_pattern(self, tmp_path):
|
|
(tmp_path / ".gitignore").write_text("**/*.pyc\n")
|
|
matcher = GitignoreMatcher(tmp_path)
|
|
|
|
assert matcher.is_ignored(tmp_path / "a.pyc") is True
|
|
assert matcher.is_ignored(tmp_path / "src" / "a.pyc") is True
|
|
assert matcher.is_ignored(tmp_path / "src" / "deep" / "a.pyc") is True
|
|
|
|
def test_no_gitignore_file(self, tmp_path):
|
|
matcher = GitignoreMatcher(tmp_path)
|
|
assert matcher.is_ignored(tmp_path / "file.py") is False
|
|
assert matcher.is_ignored(tmp_path / "anything") is False
|