Source code for xdoctest.core

"""
Core methods used by xdoctest runner and plugin code to statically extract
doctests from a module or package.


The following is a glossary of terms and jargon used in this repo.

* callname - the name of a callable function, method, class etc... e.g.
  ``myfunc``, ``MyClass``, or ``MyClass.some_method``.

* got / want - a test that produces stdout or a value to check. Whatever is
  produced is what you "got" and whatever is expected is what you "want".
  See :mod:`xdoctest.checker` for more details.

* directives - special in-doctest comments that change the behavior of the
  doctests at runtime. See :mod:`xdoctest.directive` for more details.

* chevrons - the three cheverons (``>>> ``) or right angle brakets are the
    standard prefix for a doctest, also referred to as a PS1 line in the
    parser.

* zero-args - a function that can be called without any arguments.

* freeform style - This is the term used to refer to a doctest that could be
    anywhere in the docstring. The alternative are structured doctests where
    they are only expected in known positions like in "Example blocks" for
    google and numpy style docstrings.

* TODO - complete this list (Make an issue or PR if there is any term you don't
    immediately understand!).
"""
import sys
import textwrap
import warnings
import itertools as it
import types
from os.path import exists
from fnmatch import fnmatch
from xdoctest import dynamic_analysis
from xdoctest import static_analysis
from xdoctest import parser
from xdoctest import exceptions
from xdoctest import doctest_example
from xdoctest import utils
from xdoctest.docstr import docscrape_google
from xdoctest.utils import util_import
from xdoctest import global_state


DOCTEST_STYLES = [
    'freeform',
    'google',
    'auto',
    # 'numpy',  # TODO
]

__docstubs__ = """
import xdoctest.doctest_example
"""


[docs] def parse_freeform_docstr_examples(docstr, callname=None, modpath=None, lineno=1, fpath=None, asone=True): r""" Finds free-form doctests in a docstring. This is similar to the original doctests because these tests do not requires a google/numpy style header. Some care is taken to avoid enabling tests that look like disabled google doctests or scripts. Args: docstr (str): an extracted docstring callname (str | None): the name of the callable (e.g. function, class, or method) that this docstring belongs to. modpath (str | PathLike | None): original module the docstring is from lineno (int): the line number (starting from 1) of the docstring. i.e. if you were to go to this line number in the source file the starting quotes of the docstr would be on this line. Defaults to 1. fpath (str | PathLike | None): the file that the docstring is from (if the file was not a module, needed for backwards compatibility) asone (bool): if False doctests are broken into multiple examples based on spacing, otherwise they are executed as a single unit. Defaults to True. Yields: xdoctest.doctest_example.DocTest : doctest object Raises: xdoctest.exceptions.DoctestParseError: if an error occurs in parsing CommandLine: python -m xdoctest.core parse_freeform_docstr_examples Example: >>> # TODO: move this to unit tests and make the doctest simpler >>> from xdoctest import core >>> from xdoctest import utils >>> docstr = utils.codeblock( >>> ''' >>> freeform >>> >>> doctest >>> >>> hasmultilines >>> whoppie >>> >>> 'but this is the same doctest' >>> >>> >>> secondone >>> >>> Script: >>> >>> 'special case, dont parse me' >>> >>> DisableDoctest: >>> >>> 'special case, dont parse me' >>> want >>> >>> AnythingElse: >>> >>> 'general case, parse me' >>> want >>> ''') >>> examples = list(parse_freeform_docstr_examples(docstr, asone=True)) >>> assert len(examples) == 1 >>> examples = list(parse_freeform_docstr_examples(docstr, asone=False)) >>> assert len(examples) == 3 """ def doctest_from_parts(parts, num, curr_offset): # FIXME: this will cause line numbers to become misaligned nested = [ p.orig_lines if p.want is None else p.orig_lines + p.want.splitlines() for p in parts ] docsrc = '\n'.join(list(it.chain.from_iterable(nested))) docsrc = textwrap.dedent(docsrc) example = doctest_example.DocTest(docsrc, modpath=modpath, callname=callname, num=num, lineno=lineno + curr_offset, fpath=fpath) # rebase the offsets relative to the test lineno (ie start at 0) unoffset = parts[0].line_offset for p in parts: p.line_offset -= unoffset # We've already parsed the parts, so we dont need to do it again example._parts = parts return example if global_state.DEBUG_CORE: # nocover print('Parsing docstring for callname={} in modpath={}'.format( callname, modpath)) respect_google_headers = True if respect_google_headers: # pragma: nobranch # TODO: make configurable # When in freeform mode we still try to respect google doctest patterns # that prevent a test from being run. special_skip_patterns = [ 'DisableDoctest:', 'DisableExample:', 'SkipDoctest:', 'Ignore:', 'Script:', 'Benchmark:', 'Sympy:', ] else: special_skip_patterns = [] # nocover special_skip_patterns_ = tuple([ p.lower() for p in special_skip_patterns ]) def _start_ignoring(prev): return (special_skip_patterns_ and isinstance(prev, str) and prev.strip().lower().endswith(special_skip_patterns_)) # parse into doctest and plaintext parts info = dict(callname=callname, modpath=modpath, lineno=lineno, fpath=fpath) all_parts = list(parser.DoctestParser().parse(docstr, info)) curr_parts = [] curr_offset = 0 num = 0 prev_part = None ignoring = False for part in all_parts: if isinstance(part, str): # Part is a plaintext if asone: # Lump all doctest parts into one example if not curr_parts: curr_offset += part.count('\n') + 1 else: # nocover if curr_parts: # Group the current parts into a single doctest example = doctest_from_parts(curr_parts, num, curr_offset) yield example # Initialize empty parts for a new doctest curr_offset += sum(p.n_lines for p in curr_parts) num += 1 curr_parts = [] curr_offset += part.count('\n') + 1 # stop ignoring ignoring = False else: # If the previous part was text-based, and matches a special skip # ignore pattern then ignore all tests until a new doctest block # begins. (different doctest blocks are separated by plaintext) if ignoring or _start_ignoring(prev_part): ignoring = True if asone: if not curr_parts: curr_offset += part.n_lines else: curr_offset += part.n_lines else: # Append part to the current parts curr_parts.append(part) prev_part = part if curr_parts: # Group remaining parts into the final doctest example = doctest_from_parts(curr_parts, num, curr_offset) yield example
[docs] def parse_google_docstr_examples(docstr, callname=None, modpath=None, lineno=1, fpath=None, eager_parse=True): """ Parses Google-style doctests from a docstr and generates example objects Args: docstr (str): an extracted docstring callname (str | None): the name of the callable (e.g. function, class, or method) that this docstring belongs to. modpath (str | PathLike | None): original module the docstring is from lineno (int): the line number (starting from 1) of the docstring. i.e. if you were to go to this line number in the source file the starting quotes of the docstr would be on this line. Defaults to 1. fpath (str | PathLike | None): the file that the docstring is from (if the file was not a module, needed for backwards compatibility) eager_parse (bool): if True eagerly evaluate the parser inside the google example blocks. Defaults to True. Yields: xdoctest.doctest_example.DocTest : doctest object Raises: xdoctest.exceptions.MalformedDocstr: if an error occurs in finding google blocks xdoctest.exceptions.DoctestParseError: if an error occurs in parsing """ try: blocks = docscrape_google.split_google_docblocks(docstr) except exceptions.MalformedDocstr: print('ERROR PARSING {} GOOGLE BLOCKS IN {} ON line {}'.format( callname, modpath, lineno)) print('Did you forget to make a docstr with newlines raw?') raise example_blocks = [] example_tags = ('Example', 'Doctest', 'Script', 'Benchmark') for type, block in blocks: if type.startswith(example_tags): example_blocks.append((type, block)) for num, (type, (docsrc, offset)) in enumerate(example_blocks): # Add one because offset indicates the position of the block-label # and the body of the block always starts on the next line. label_lineno = lineno + offset body_lineno = label_lineno + 1 example = doctest_example.DocTest(docsrc, modpath, callname, num, lineno=body_lineno, fpath=fpath, block_type=type) if eager_parse: # parse on the fly to be consistent with freeform? example._parse() yield example
[docs] def parse_auto_docstr_examples(docstr, *args, **kwargs): """ First try to parse google style, but if no tests are found use freeform style. """ if global_state.DEBUG_CORE: # nocover print('Automatic style is trying google parsing') n_found = 0 try: for example in parse_google_docstr_examples(docstr, *args, **kwargs): n_found += 1 yield example except Exception: if n_found > 0: raise # no google style tests were found, parse in freeform if n_found == 0: if global_state.DEBUG_CORE: # nocover print('Automatic style is trying freeform parsing') for example in parse_freeform_docstr_examples(docstr, *args, **kwargs): yield example
[docs] def parse_docstr_examples(docstr, callname=None, modpath=None, lineno=1, style='auto', fpath=None, parser_kw=None): """ Parses doctests from a docstr and generates example objects. The style influences which tests are found. Args: docstr (str): a previously extracted docstring callname (str | None): the name of the callable (e.g. function, class, or method) that this docstring belongs to. modpath (str | PathLike | None): original module the docstring is from lineno (int): the line number (starting from 1) of the docstring. i.e. if you were to go to this line number in the source file the starting quotes of the docstr would be on this line. Defaults to 1. style (str): expected doctest style, which can be "google", "freeform", or "auto". Defaults to 'auto'. fpath (str | PathLike | None): the file that the docstring is from (if the file was not a module, needed for backwards compatibility) parser_kw (dict | None): passed to the parser as keyword args Yields: xdoctest.doctest_example.DocTest : parsed example CommandLine: python -m xdoctest.core parse_docstr_examples Example: >>> from xdoctest.core import * >>> from xdoctest import utils >>> docstr = utils.codeblock( ... ''' ... >>> 1 + 1 # xdoctest: +SKIP ... 2 ... >>> 2 + 2 ... 4 ... ''') >>> examples = list(parse_docstr_examples(docstr, 'name', fpath='foo.txt', style='freeform')) >>> print(len(examples)) 1 >>> examples = list(parse_docstr_examples(docstr, fpath='foo.txt')) """ if global_state.DEBUG_CORE: # nocover print('Parsing docstring examples for ' 'callname={} in modpath={}'.format(callname, modpath)) if style == 'freeform': parser = parse_freeform_docstr_examples elif style == 'google': parser = parse_google_docstr_examples elif style == 'auto': parser = parse_auto_docstr_examples # TODO: epdoc # TODO: # elif style == 'numpy': # parser = parse_numpy_docstr_examples else: raise KeyError('Unknown style={}. Valid styles are {}'.format( style, DOCTEST_STYLES)) if global_state.DEBUG_CORE: # nocover print('parser = {!r}'.format(parser)) n_parsed = 0 try: if parser_kw is None: parser_kw = {} for example in parser(docstr, callname=callname, modpath=modpath, fpath=fpath, lineno=lineno, **parser_kw): n_parsed += 1 yield example except Exception as ex: if global_state.DEBUG_CORE: # nocover print('Caught an error when parsing') msg = ('Cannot scrape callname={} in modpath={} line={}.\n' 'Caused by: {}\n') # raise msg = msg.format(callname, modpath, lineno, repr(ex)) if isinstance(ex, exceptions.DoctestParseError): # TODO: Can we print a nicer syntax error here? msg += '{}\n'.format(ex.string) msg += 'Original Error: {}\n'.format(repr(ex.orig_ex)) if isinstance(ex.orig_ex, SyntaxError): extra_help = '' if ex.orig_ex.text: extra_help += utils.ensure_unicode(ex.orig_ex.text) if ex.orig_ex.offset is not None: extra_help += ' ' * (ex.orig_ex.offset - 1) + '^' if extra_help: msg += '\n' + extra_help # Always warn when something bad is happening. # However, dont error if the docstr simply has bad syntax print('msg = {}'.format(msg)) warnings.warn(msg) if isinstance(ex, exceptions.MalformedDocstr): pass elif isinstance(ex, exceptions.DoctestParseError): pass else: raise if global_state.DEBUG_CORE: # nocover print('Finished parsing {} examples'.format(n_parsed))
def _rectify_to_modpath(modpath_or_name): """ if modpath_or_name is a name, statically converts it to a path """ if isinstance(modpath_or_name, types.ModuleType): raise TypeError('Expected a static module but got a dynamic one') modpath = util_import.modname_to_modpath(modpath_or_name) if modpath is None: if exists(modpath_or_name): modpath = modpath_or_name else: raise ValueError('Cannot find module={}'.format(modpath_or_name)) return modpath
[docs] def package_calldefs(pkg_identifier, exclude=[], ignore_syntax_errors=True, analysis='auto'): """ Statically generates all callable definitions in a module or package Args: pkg_identifier (str | ModuleType): path to or name of the module to be tested (or the live module itself, which is not recommended) exclude (List[str]): glob-patterns of file names to exclude ignore_syntax_errors (bool): if False raise an error when syntax errors occur in a doctest Defaults to True. analysis (str): if 'static', only static analysis is used to parse call definitions. If 'auto', uses dynamic analysis for compiled python extensions, but static analysis elsewhere, if 'dynamic', then dynamic analysis is used to parse all calldefs. Defaults to 'auto'. Yields: Tuple[Dict[str, xdoctest.static_analysis.CallDefNode], str | ModuleType] - * item[0]: the mapping of callnames-to-calldefs * item[1]: the path to the file containing the doctest (usually a module) or the module itself Example: >>> pkg_identifier = 'xdoctest.core' >>> testables = list(package_calldefs(pkg_identifier)) >>> assert len(testables) == 1 >>> calldefs, modpath = testables[0] >>> assert util_import.modpath_to_modname(modpath) == pkg_identifier >>> assert 'package_calldefs' in calldefs """ if global_state.DEBUG_CORE: # nocover print('Find package calldefs: pkg_identifier = {!r}'.format(pkg_identifier)) if isinstance(pkg_identifier, types.ModuleType): # Case where we are forced to use a live module identifiers = [pkg_identifier] else: pkgpath = _rectify_to_modpath(pkg_identifier) identifiers = list(static_analysis.package_modpaths( pkgpath, with_pkg=True, with_libs=True)) for module_identifier in identifiers: if isinstance(module_identifier, str): modpath = module_identifier modname = util_import.modpath_to_modname(modpath) if any(fnmatch(modname, pat) for pat in exclude): continue if not exists(modpath): warnings.warn( 'Module {} does not exist. ' 'Is it an old pyc file?'.format(modname)) continue try: calldefs = parse_calldefs(module_identifier, analysis=analysis) if calldefs is not None: yield calldefs, module_identifier except SyntaxError as ex: # Handle error due to the actual code containing errors msg = 'Cannot parse module={}.\nCaused by: {}' msg = msg.format(module_identifier, ex) if ignore_syntax_errors: warnings.warn(msg) # real code or docstr contained errors else: raise SyntaxError(msg)
[docs] def parse_calldefs(module_identifier, analysis='auto'): """ Parse calldefs from a single module using either static or dynamic analysis. Args: module_identifier (str | ModuleType): path to or name of the module to be tested (or the live module itself, which is not recommended) analysis (str, default='auto'): if 'static', only static analysis is used to parse call definitions. If 'auto', uses dynamic analysis for compiled python extensions, but static analysis elsewhere, if 'dynamic', then dynamic analysis is used to parse all calldefs. Returns: Dict[str, xdoctest.static_analysis.CallDefNode]: the mapping of callnames-to-calldefs within the module. """ # backwards compatibility hacks if '--allow-xdoc-dynamic' in sys.argv: from xdoctest.utils import util_deprecation util_deprecation.schedule_deprecation( modname='xdoctest', name='--allow-xdoc-dynamic', type='CLI flag', migration='use --analysis=auto instead', deprecate='1.0.0', error='1.1.0', remove='1.2.0' ) analysis = 'auto' if '--xdoc-force-dynamic' in sys.argv: from xdoctest.utils import util_deprecation util_deprecation.schedule_deprecation( modname='xdoctest', name='--xdoc-force-dynamic', type='CLI flag', migration='use --analysis=dynamic instead', deprecate='1.0.0', error='1.1.0', remove='1.2.0' ) analysis = 'dynamic' if isinstance(module_identifier, types.ModuleType): # identifier is a live module need_dynamic = True else: # identifier is a path to a module modpath = module_identifier # Certain files (notebooks and c-extensions) require dynamic analysis need_dynamic = modpath.endswith( static_analysis._platform_pylib_exts()) if modpath.endswith('.ipynb'): need_dynamic = True if analysis == 'static': if need_dynamic: # Some modules can only be parsed dynamically raise Exception(( 'Static analysis required, but {} requires ' 'dynamic analysis').format(module_identifier)) do_dynamic = False elif analysis == 'dynamic': do_dynamic = True elif analysis == 'auto': do_dynamic = need_dynamic else: raise KeyError(analysis) if global_state.DEBUG_CORE: # nocover print('About to parse calldefs with do_dynamic={}'.format(do_dynamic)) calldefs = None if do_dynamic: try: calldefs = dynamic_analysis.parse_dynamic_calldefs(module_identifier) except (ImportError, RuntimeError) as ex: # Some modules are just c modules msg = 'Cannot dynamically parse module={}.\nCaused by: {!r} {}' msg = msg.format(module_identifier, type(ex), ex) warnings.warn(msg) except Exception as ex: msg = 'Cannot dynamically parse module={}.\nCaused by: {!r} {}' msg = msg.format(module_identifier, type(ex), ex) warnings.warn(msg) raise else: calldefs = static_analysis.parse_static_calldefs(fpath=module_identifier) if global_state.DEBUG_CORE: # nocover print('Found {} calldefs'.format(len(calldefs))) return calldefs
[docs] def parse_doctestables(module_identifier, exclude=[], style='auto', ignore_syntax_errors=True, parser_kw={}, analysis='auto'): """ Parses all doctests within top-level callables of a module and generates example objects. The style influences which tests are found. Args: module_identifier (str | PathLike | ModuleType): path or name of a module or a module itself (we prefer a path) exclude (List[str]): glob-patterns of file names to exclude style (str): expected doctest style (e.g. google, freeform, auto) ignore_syntax_errors (bool, default=True): if False raise an error when syntax errors parser_kw: extra args passed to the parser analysis (str, default='auto'): if 'static', only static analysis is used to parse call definitions. If 'auto', uses dynamic analysis for compiled python extensions, but static analysis elsewhere, if 'dynamic', then dynamic analysis is used to parse all calldefs. Yields: xdoctest.doctest_example.DocTest : parsed doctest example objects CommandLine: python -m xdoctest.core parse_doctestables Example: >>> module_identifier = 'xdoctest.core' >>> testables = list(parse_doctestables(module_identifier)) >>> this_example = None >>> for example in testables: >>> # print(example) >>> if example.callname == 'parse_doctestables': >>> this_example = example >>> assert this_example is not None >>> assert this_example.callname == 'parse_doctestables' Example: >>> from xdoctest import utils >>> docstr = utils.codeblock( ... ''' ... >>> 1 + 1 # xdoctest: +SKIP ... 2 ... >>> 2 + 2 ... 4 ... ''') >>> temp = utils.TempDoctest(docstr, 'test_modfile') >>> modpath = temp.modpath >>> examples = list(parse_doctestables(modpath, style='freeform')) >>> print(len(examples)) 1 """ if style not in DOCTEST_STYLES: raise KeyError('Unknown style={}. Valid styles are {}'.format( style, DOCTEST_STYLES)) # Statically parse modules and their doctestable callables in a package for calldefs, modpath in package_calldefs(module_identifier, exclude, ignore_syntax_errors, analysis=analysis): for callname, calldef in calldefs.items(): docstr = calldef.docstr if calldef.docstr is not None: lineno = calldef.doclineno example_gen = parse_docstr_examples( docstr, callname=callname, modpath=modpath, lineno=lineno, style=style, parser_kw=parser_kw) if global_state.DEBUG_CORE: # nocover for example in example_gen: print(' * Yield example={}'.format(example)) yield example else: for example in example_gen: yield example
if __name__ == '__main__': """ CommandLine: python -m xdoctest.core all """ import xdoctest as xdoc xdoc.doctest_module()