ocrag/tests/unit/test_chunker.py

46 lines
1.1 KiB
Python

import pytest
from ocrag.chunker import chunk_code
@pytest.fixture
def sample_python_code():
return """def hello():
print('Hello, World!')
class Test:
def method(self):
pass"""
@pytest.fixture
def sample_markdown_content():
return """# Title
Paragraph 1
## Subtitle
Paragraph 2"""
def test_chunk_code_python(sample_python_code):
chunks = chunk_code(sample_python_code, "test.py")
assert len(chunks) > 0
assert "def hello" in chunks[0]["text"]
assert chunks[0]["metadata"]["language"] == "python"
def test_chunk_code_markdown(sample_markdown_content):
chunks = chunk_code(sample_markdown_content, "test.md")
assert len(chunks) > 0
assert "# Title" in chunks[0]["text"]
assert chunks[0]["metadata"]["language"] == "markdown"
def test_chunk_code_text():
content = "a" * 10000 # Large content to test text fallback
chunks = chunk_code(content, "large.txt")
assert len(chunks) > 0
assert chunks[0]["metadata"]["source_file"] == "large.txt"
assert chunks[0]["metadata"]["language"] == "text"