import pytest from ocrag.chunker import chunk_code @pytest.fixture def sample_python_code(): return """def hello(): print('Hello, World!') class Test: def method(self): pass""" @pytest.fixture def sample_markdown_content(): return """# Title Paragraph 1 ## Subtitle Paragraph 2""" def test_chunk_code_python(sample_python_code): chunks = chunk_code(sample_python_code, "test.py") assert len(chunks) > 0 assert "def hello" in chunks[0]["text"] assert chunks[0]["metadata"]["language"] == "python" def test_chunk_code_markdown(sample_markdown_content): chunks = chunk_code(sample_markdown_content, "test.md") assert len(chunks) > 0 assert "# Title" in chunks[0]["text"] assert chunks[0]["metadata"]["language"] == "markdown" def test_chunk_code_text(): content = "a" * 10000 # Large content to test text fallback chunks = chunk_code(content, "large.txt") assert len(chunks) > 0 assert chunks[0]["metadata"]["source_file"] == "large.txt" assert chunks[0]["metadata"]["language"] == "text"