46 lines
1.1 KiB
Python
46 lines
1.1 KiB
Python
import pytest
|
|
from ocrag.chunker import chunk_code
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_python_code():
|
|
return """def hello():
|
|
print('Hello, World!')
|
|
|
|
class Test:
|
|
def method(self):
|
|
pass"""
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_markdown_content():
|
|
return """# Title
|
|
|
|
Paragraph 1
|
|
|
|
## Subtitle
|
|
|
|
Paragraph 2"""
|
|
|
|
|
|
def test_chunk_code_python(sample_python_code):
|
|
chunks = chunk_code(sample_python_code, "test.py")
|
|
assert len(chunks) > 0
|
|
assert "def hello" in chunks[0]["text"]
|
|
assert chunks[0]["metadata"]["language"] == "python"
|
|
|
|
|
|
def test_chunk_code_markdown(sample_markdown_content):
|
|
chunks = chunk_code(sample_markdown_content, "test.md")
|
|
assert len(chunks) > 0
|
|
assert "# Title" in chunks[0]["text"]
|
|
assert chunks[0]["metadata"]["language"] == "markdown"
|
|
|
|
|
|
def test_chunk_code_text():
|
|
content = "a" * 10000 # Large content to test text fallback
|
|
chunks = chunk_code(content, "large.txt")
|
|
assert len(chunks) > 0
|
|
assert chunks[0]["metadata"]["source_file"] == "large.txt"
|
|
assert chunks[0]["metadata"]["language"] == "text"
|