dotfiles/vim/plugins/ultisnips/pythonx/UltiSnips/snippet/parsing/_lexer.py

#!/usr/bin/env python
# encoding: utf-8

"""Not really a lexer in the classical sense, but code to convert snippet
definitions into logical units called Tokens."""

import string
import re

from UltiSnips.compatibility import as_unicode
from UltiSnips.position import Position
from UltiSnips.text import unescape


class _TextIterator(object):

    """Helper class to make iterating over text easier."""

    def __init__(self, text, offset):
        self._text = as_unicode(text)
        self._line = offset.line
        self._col = offset.col

        self._idx = 0

    def __iter__(self):
        """Iterator interface."""
        return self

    def __next__(self):
        """Returns the next character."""
        if self._idx >= len(self._text):
            raise StopIteration

        rv = self._text[self._idx]
        if self._text[self._idx] in ('\n', '\r\n'):
            self._line += 1
            self._col = 0
        else:
            self._col += 1
        self._idx += 1
        return rv
    next = __next__  # for python2

    def peek(self, count=1):
        """Returns the next 'count' characters without advancing the stream."""
        if count > 1:  # This might return '' if nothing is found
            return self._text[self._idx:self._idx + count]
        try:
            return self._text[self._idx]
        except IndexError:
            return None

    @property
    def pos(self):
        """Current position in the text."""
        return Position(self._line, self._col)


def _parse_number(stream):
    """Expects the stream to contain a number next, returns the number without
    consuming any more bytes."""
    rv = ''
    while stream.peek() and stream.peek() in string.digits:
        rv += next(stream)

    return int(rv)


def _parse_till_closing_brace(stream):
    """
    Returns all chars till a non-escaped } is found. Other
    non escaped { are taken into account and skipped over.

    Will also consume the closing }, but not return it
    """
    rv = ''
    in_braces = 1
    while True:
        if EscapeCharToken.starts_here(stream, '{}'):
            rv += next(stream) + next(stream)
        else:
            char = next(stream)
            if char == '{':
                in_braces += 1
            elif char == '}':
                in_braces -= 1
            if in_braces == 0:
                break
            rv += char
    return rv


def _parse_till_unescaped_char(stream, chars):
    """
    Returns all chars till a non-escaped char is found.

    Will also consume the closing char, but and return it as second
    return value
    """
    rv = ''
    while True:
        escaped = False
        for char in chars:
            if EscapeCharToken.starts_here(stream, char):
                rv += next(stream) + next(stream)
                escaped = True
        if not escaped:
            char = next(stream)
            if char in chars:
                break
            rv += char
    return rv, char


class Token(object):

    """Represents a Token as parsed from a snippet definition."""

    def __init__(self, gen, indent):
        self.initial_text = as_unicode('')
        self.start = gen.pos
        self._parse(gen, indent)
        self.end = gen.pos

    def _parse(self, stream, indent):
        """Parses the token from 'stream' with the current 'indent'."""
        pass  # Does nothing


class TabStopToken(Token):

    """${1:blub}"""
    CHECK = re.compile(r'^\${\d+[:}]')

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(10)) is not None

    def _parse(self, stream, indent):
        next(stream)  # $
        next(stream)  # {

        self.number = _parse_number(stream)

        if stream.peek() == ':':
            next(stream)
        self.initial_text = _parse_till_closing_brace(stream)

    def __repr__(self):
        return 'TabStopToken(%r,%r,%r,%r)' % (
            self.start, self.end, self.number, self.initial_text
        )


class VisualToken(Token):

    """${VISUAL}"""
    CHECK = re.compile(r"^\${VISUAL[:}/]")

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(10)) is not None

    def _parse(self, stream, indent):
        for _ in range(8):  # ${VISUAL
            next(stream)

        if stream.peek() == ':':
            next(stream)
        self.alternative_text, char = _parse_till_unescaped_char(stream, '/}')
        self.alternative_text = unescape(self.alternative_text)

        if char == '/':  # Transformation going on
            try:
                self.search = _parse_till_unescaped_char(stream, '/')[0]
                self.replace = _parse_till_unescaped_char(stream, '/')[0]
                self.options = _parse_till_closing_brace(stream)
            except StopIteration:
                raise RuntimeError(
                    "Invalid ${VISUAL} transformation! Forgot to escape a '/'?")
        else:
            self.search = None
            self.replace = None
            self.options = None

    def __repr__(self):
        return 'VisualToken(%r,%r)' % (
            self.start, self.end
        )


class TransformationToken(Token):

    """${1/match/replace/options}"""

    CHECK = re.compile(r'^\${\d+\/')

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(10)) is not None

    def _parse(self, stream, indent):
        next(stream)  # $
        next(stream)  # {

        self.number = _parse_number(stream)

        next(stream)  # /

        self.search = _parse_till_unescaped_char(stream, '/')[0]
        self.replace = _parse_till_unescaped_char(stream, '/')[0]
        self.options = _parse_till_closing_brace(stream)

    def __repr__(self):
        return 'TransformationToken(%r,%r,%r,%r,%r)' % (
            self.start, self.end, self.number, self.search, self.replace
        )


class MirrorToken(Token):

    """$1."""
    CHECK = re.compile(r'^\$\d+')

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(10)) is not None

    def _parse(self, stream, indent):
        next(stream)  # $
        self.number = _parse_number(stream)

    def __repr__(self):
        return 'MirrorToken(%r,%r,%r)' % (
            self.start, self.end, self.number
        )


class EscapeCharToken(Token):

    """\\n."""
    @classmethod
    def starts_here(cls, stream, chars=r'{}\$`'):
        """Returns true if this token starts at the current position in
        'stream'."""
        cs = stream.peek(2)
        if len(cs) == 2 and cs[0] == '\\' and cs[1] in chars:
            return True

    def _parse(self, stream, indent):
        next(stream)  # \
        self.initial_text = next(stream)

    def __repr__(self):
        return 'EscapeCharToken(%r,%r,%r)' % (
            self.start, self.end, self.initial_text
        )


class ShellCodeToken(Token):

    """`echo "hi"`"""
    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return stream.peek(1) == '`'

    def _parse(self, stream, indent):
        next(stream)  # `
        self.code = _parse_till_unescaped_char(stream, '`')[0]

    def __repr__(self):
        return 'ShellCodeToken(%r,%r,%r)' % (
            self.start, self.end, self.code
        )


class PythonCodeToken(Token):

    """`!p snip.rv = "Hi"`"""
    CHECK = re.compile(r'^`!p\s')

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(4)) is not None

    def _parse(self, stream, indent):
        for _ in range(3):
            next(stream)  # `!p
        if stream.peek() in '\t ':
            next(stream)

        code = _parse_till_unescaped_char(stream, '`')[0]

        # Strip the indent if any
        if len(indent):
            lines = code.splitlines()
            self.code = lines[0] + '\n'
            self.code += '\n'.join([l[len(indent):]
                                    for l in lines[1:]])
        else:
            self.code = code
        self.indent = indent

    def __repr__(self):
        return 'PythonCodeToken(%r,%r,%r)' % (
            self.start, self.end, self.code
        )


class VimLCodeToken(Token):

    """`!v g:hi`"""
    CHECK = re.compile(r'^`!v\s')

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(4)) is not None

    def _parse(self, stream, indent):
        for _ in range(4):
            next(stream)  # `!v
        self.code = _parse_till_unescaped_char(stream, '`')[0]

    def __repr__(self):
        return 'VimLCodeToken(%r,%r,%r)' % (
            self.start, self.end, self.code
        )


class EndOfTextToken(Token):

    """Appears at the end of the text."""

    def __repr__(self):
        return 'EndOfText(%r)' % self.end


def tokenize(text, indent, offset, allowed_tokens):
    """Returns an iterator of tokens of 'text'['offset':] which is assumed to
    have 'indent' as the whitespace of the begging of the lines. Only
    'allowed_tokens' are considered to be valid tokens."""
    stream = _TextIterator(text, offset)
    try:
        while True:
            done_something = False
            for token in allowed_tokens:
                if token.starts_here(stream):
                    yield token(stream, indent)
                    done_something = True
                    break
            if not done_something:
                next(stream)
    except StopIteration:
        yield EndOfTextToken(stream, indent)