Source code for langchain_community.document_loaders.parsers.language.python
import ast
from typing import Any, List
from langchain_community.document_loaders.parsers.language.code_segmenter import (
CodeSegmenter,
)
[docs]class PythonSegmenter(CodeSegmenter):
"""Code segmenter for `Python`."""
[docs] def __init__(self, code: str):
super().__init__(code)
self.source_lines = self.code.splitlines()
[docs] def is_valid(self) -> bool:
try:
ast.parse(self.code)
return True
except SyntaxError:
return False
def _extract_code(self, node: Any) -> str:
start = node.lineno - 1
end = node.end_lineno
return "\n".join(self.source_lines[start:end])
[docs] def simplify_code(self) -> str:
tree = ast.parse(self.code)
simplified_lines = self.source_lines[:]
for node in ast.iter_child_nodes(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
start = node.lineno - 1
simplified_lines[start] = f"# Code for: {simplified_lines[start]}"
assert isinstance(node.end_lineno, int)
for line_num in range(start + 1, node.end_lineno):
simplified_lines[line_num] = None # type: ignore
return "\n".join(line for line in simplified_lines if line is not None)