ocrag/tests/unit/test_utils.py

127 lines
4.9 KiB
Python

import os
import pytest
from pathlib import Path
from ocrag.gitignore import GitignoreMatcher, get_gitignore_matcher, find_project_root
from ocrag.utils import is_text_file
class TestIsTextFile:
def test_python_file_is_text(self, tmp_path):
f = tmp_path / "test.py"
f.write_text("def foo():\n pass")
assert is_text_file(str(f)) is True
def test_binary_file_is_not_text(self, tmp_path):
f = tmp_path / "test.bin"
f.write_bytes(b"\x00\x01\x02\x03binary")
assert is_text_file(str(f)) is False
def test_empty_file_is_text(self, tmp_path):
f = tmp_path / "empty.txt"
f.write_text("")
assert is_text_file(str(f)) is True
def test_utf8_with_non_ascii_is_text(self, tmp_path):
f = tmp_path / "unicode.txt"
f.write_text("中文内容测试\n日本語\n한국어")
assert is_text_file(str(f)) is True
def test_invalid_utf8_is_not_text(self, tmp_path):
f = tmp_path / "invalid.txt"
f.write_bytes(b"hello\x80world")
assert is_text_file(str(f)) is False
class TestFindProjectRoot:
def test_finds_gitignore(self, tmp_path):
(tmp_path / ".gitignore").write_text("*.pyc")
sub = tmp_path / "src" / "deep"
sub.mkdir(parents=True)
found = find_project_root(sub)
assert found == tmp_path
def test_no_gitignore_returns_none(self, tmp_path):
sub = tmp_path / "src"
sub.mkdir(parents=True)
found = find_project_root(sub)
assert found is None
class TestGitignoreMatcher:
def test_ignore_pattern(self, tmp_path):
(tmp_path / ".gitignore").write_text("*.pyc\n__pycache__/\n")
matcher = GitignoreMatcher(tmp_path)
assert matcher.is_ignored(tmp_path / "file.pyc") is True
assert matcher.is_ignored(tmp_path / "file.py") is False
assert matcher.is_ignored(tmp_path / "__pycache__" / "cache.pyc") is True
def test_negation_pattern(self, tmp_path):
(tmp_path / ".gitignore").write_text("*.log\n!important.log\n")
matcher = GitignoreMatcher(tmp_path)
assert matcher.is_ignored(tmp_path / "debug.log") is True
assert matcher.is_ignored(tmp_path / "important.log") is False
def test_subdirectory_gitignore(self, tmp_path):
(tmp_path / ".gitignore").write_text("*.py\n")
sub_dir = tmp_path / "src"
sub_dir.mkdir()
(sub_dir / ".gitignore").write_text("test_*.py\n")
matcher = GitignoreMatcher(tmp_path)
assert matcher.is_ignored(tmp_path / "main.py") is True
assert matcher.is_ignored(tmp_path / "utils.py") is True
assert matcher.is_ignored(sub_dir / "helper.py") is True
assert matcher.is_ignored(sub_dir / "test_main.py") is True
def test_anchored_pattern(self, tmp_path):
(tmp_path / ".gitignore").write_text("/build\n")
matcher = GitignoreMatcher(tmp_path)
assert matcher.is_ignored(tmp_path / "build" / "output") is True
assert matcher.is_ignored(tmp_path / "src" / "build" / "out") is False
def test_ignore_directory_ending_slash(self, tmp_path):
(tmp_path / ".gitignore").write_text("node_modules/\n")
matcher = GitignoreMatcher(tmp_path)
assert matcher.is_ignored(tmp_path / "node_modules" / "package.json") is True
assert matcher.is_ignored(tmp_path / "other" / "node_modules" / "file") is True
def test_get_matcher_returns_none_when_no_gitignore(self, tmp_path):
result = get_gitignore_matcher(tmp_path / "nonexistent")
assert result is None
def test_get_matcher_returns_matcher(self, tmp_path):
(tmp_path / ".gitignore").write_text("*.pyc")
result = get_gitignore_matcher(tmp_path)
assert result is not None
assert result.is_ignored(tmp_path / "file.pyc") is True
def test_comments_ignored(self, tmp_path):
(tmp_path / ".gitignore").write_text(
"# this is a comment\n*.log\n # another comment\n"
)
matcher = GitignoreMatcher(tmp_path)
assert matcher.is_ignored(tmp_path / "debug.log") is True
def test_whitespace_lines(self, tmp_path):
(tmp_path / ".gitignore").write_text(" *.log\n")
matcher = GitignoreMatcher(tmp_path)
assert matcher.is_ignored(tmp_path / " debug.log") is True
assert matcher.is_ignored(tmp_path / "debug.log") is False
def test_double_star_pattern(self, tmp_path):
(tmp_path / ".gitignore").write_text("**/*.pyc\n")
matcher = GitignoreMatcher(tmp_path)
assert matcher.is_ignored(tmp_path / "a.pyc") is True
assert matcher.is_ignored(tmp_path / "src" / "a.pyc") is True
assert matcher.is_ignored(tmp_path / "src" / "deep" / "a.pyc") is True
def test_no_gitignore_file(self, tmp_path):
matcher = GitignoreMatcher(tmp_path)
assert matcher.is_ignored(tmp_path / "file.py") is False
assert matcher.is_ignored(tmp_path / "anything") is False