dotfiles/vim/plugins/ultisnips/pythonx/UltiSnips/snippet/parsing/_lexer.py

#!/usr/bin/env python
# encoding: utf-8

"""Not really a lexer in the classical sense, but code to convert snippet
definitions into logical units called Tokens."""

import string
import re

from UltiSnips.compatibility import as_unicode
from UltiSnips.position import Position
from UltiSnips.text import unescape


class _TextIterator(object):

    """Helper class to make iterating over text easier."""

    def __init__(self, text, offset):
        self._text = as_unicode(text)
        self._line = offset.line
        self._col = offset.col

        self._idx = 0

    def __iter__(self):
        """Iterator interface."""
        return self

    def __next__(self):
        """Returns the next character."""
        if self._idx >= len(self._text):
            raise StopIteration

        rv = self._text[self._idx]
        if self._text[self._idx] in ('\n', '\r\n'):
            self._line += 1
            self._col = 0
        else:
            self._col += 1
        self._idx += 1
        return rv
    next = __next__  # for python2

    def peek(self, count=1):
        """Returns the next 'count' characters without advancing the stream."""
        if count > 1:  # This might return '' if nothing is found
            return self._text[self._idx:self._idx + count]
        try:
            return self._text[self._idx]
        except IndexError:
            return None

    @property
    def pos(self):
        """Current position in the text."""
        return Position(self._line, self._col)


def _parse_number(stream):
    """Expects the stream to contain a number next, returns the number without
    consuming any more bytes."""
    rv = ''
    while stream.peek() and stream.peek() in string.digits:
        rv += next(stream)

    return int(rv)


def _parse_till_closing_brace(stream):
    """
    Returns all chars till a non-escaped } is found. Other
    non escaped { are taken into account and skipped over.

    Will also consume the closing }, but not return it
    """
    rv = ''
    in_braces = 1
    while True:
        if EscapeCharToken.starts_here(stream, '{}'):
            rv += next(stream) + next(stream)
        else:
            char = next(stream)
            if char == '{':
                in_braces += 1
            elif char == '}':
                in_braces -= 1
            if in_braces == 0:
                break
            rv += char
    return rv


def _parse_till_unescaped_char(stream, chars):
    """
    Returns all chars till a non-escaped char is found.

    Will also consume the closing char, but and return it as second
    return value
    """
    rv = ''
    while True:
        escaped = False
        for char in chars:
            if EscapeCharToken.starts_here(stream, char):
                rv += next(stream) + next(stream)
                escaped = True
        if not escaped:
            char = next(stream)
            if char in chars:
                break
            rv += char
    return rv, char


class Token(object):

    """Represents a Token as parsed from a snippet definition."""

    def __init__(self, gen, indent):
        self.initial_text = as_unicode('')
        self.start = gen.pos
        self._parse(gen, indent)
        self.end = gen.pos

    def _parse(self, stream, indent):
        """Parses the token from 'stream' with the current 'indent'."""
        pass  # Does nothing


class TabStopToken(Token):

    """${1:blub}"""
    CHECK = re.compile(r'^\${\d+[:}]')

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(10)) is not None

    def _parse(self, stream, indent):
        next(stream)  # $
        next(stream)  # {

        self.number = _parse_number(stream)

        if stream.peek() == ':':
            next(stream)
        self.initial_text = _parse_till_closing_brace(stream)

    def __repr__(self):
        return 'TabStopToken(%r,%r,%r,%r)' % (
            self.start, self.end, self.number, self.initial_text
        )


class VisualToken(Token):

    """${VISUAL}"""
    CHECK = re.compile(r"^\${VISUAL[:}/]")

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(10)) is not None

    def _parse(self, stream, indent):
        for _ in range(8):  # ${VISUAL
            next(stream)

        if stream.peek() == ':':
            next(stream)
        self.alternative_text, char = _parse_till_unescaped_char(stream, '/}')
        self.alternative_text = unescape(self.alternative_text)

        if char == '/':  # Transformation going on
            try:
                self.search = _parse_till_unescaped_char(stream, '/')[0]
                self.replace = _parse_till_unescaped_char(stream, '/')[0]
                self.options = _parse_till_closing_brace(stream)
            except StopIteration:
                raise RuntimeError(
                    "Invalid ${VISUAL} transformation! Forgot to escape a '/'?")
        else:
            self.search = None
            self.replace = None
            self.options = None

    def __repr__(self):
        return 'VisualToken(%r,%r)' % (
            self.start, self.end
        )


class TransformationToken(Token):

    """${1/match/replace/options}"""

    CHECK = re.compile(r'^\${\d+\/')

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(10)) is not None

    def _parse(self, stream, indent):
        next(stream)  # $
        next(stream)  # {

        self.number = _parse_number(stream)

        next(stream)  # /

        self.search = _parse_till_unescaped_char(stream, '/')[0]
        self.replace = _parse_till_unescaped_char(stream, '/')[0]
        self.options = _parse_till_closing_brace(stream)

    def __repr__(self):
        return 'TransformationToken(%r,%r,%r,%r,%r)' % (
            self.start, self.end, self.number, self.search, self.replace
        )


class MirrorToken(Token):

    """$1."""
    CHECK = re.compile(r'^\$\d+')

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(10)) is not None

    def _parse(self, stream, indent):
        next(stream)  # $
        self.number = _parse_number(stream)

    def __repr__(self):
        return 'MirrorToken(%r,%r,%r)' % (
            self.start, self.end, self.number
        )


class EscapeCharToken(Token):

    """\\n."""
    @classmethod
    def starts_here(cls, stream, chars=r'{}\$`'):
        """Returns true if this token starts at the current position in
        'stream'."""
        cs = stream.peek(2)
        if len(cs) == 2 and cs[0] == '\\' and cs[1] in chars:
            return True

    def _parse(self, stream, indent):
        next(stream)  # \
        self.initial_text = next(stream)

    def __repr__(self):
        return 'EscapeCharToken(%r,%r,%r)' % (
            self.start, self.end, self.initial_text
        )


class ShellCodeToken(Token):

    """`echo "hi"`"""
    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return stream.peek(1) == '`'

    def _parse(self, stream, indent):
        next(stream)  # `
        self.code = _parse_till_unescaped_char(stream, '`')[0]

    def __repr__(self):
        return 'ShellCodeToken(%r,%r,%r)' % (
            self.start, self.end, self.code
        )


class PythonCodeToken(Token):

    """`!p snip.rv = "Hi"`"""
    CHECK = re.compile(r'^`!p\s')

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(4)) is not None

    def _parse(self, stream, indent):
        for _ in range(3):
            next(stream)  # `!p
        if stream.peek() in '\t ':
            next(stream)

        code = _parse_till_unescaped_char(stream, '`')[0]

        # Strip the indent if any
        if len(indent):
            lines = code.splitlines()
            self.code = lines[0] + '\n'
            self.code += '\n'.join([l[len(indent):]
                                    for l in lines[1:]])
        else:
            self.code = code
        self.indent = indent

    def __repr__(self):
        return 'PythonCodeToken(%r,%r,%r)' % (
            self.start, self.end, self.code
        )


class VimLCodeToken(Token):

    """`!v g:hi`"""
    CHECK = re.compile(r'^`!v\s')

    @classmethod
    def starts_here(cls, stream):
        """Returns true if this token starts at the current position in
        'stream'."""
        return cls.CHECK.match(stream.peek(4)) is not None

    def _parse(self, stream, indent):
        for _ in range(4):
            next(stream)  # `!v
        self.code = _parse_till_unescaped_char(stream, '`')[0]

    def __repr__(self):
        return 'VimLCodeToken(%r,%r,%r)' % (
            self.start, self.end, self.code
        )


class EndOfTextToken(Token):

    """Appears at the end of the text."""

    def __repr__(self):
        return 'EndOfText(%r)' % self.end


def tokenize(text, indent, offset, allowed_tokens):
    """Returns an iterator of tokens of 'text'['offset':] which is assumed to
    have 'indent' as the whitespace of the begging of the lines. Only
    'allowed_tokens' are considered to be valid tokens."""
    stream = _TextIterator(text, offset)
    try:
        while True:
            done_something = False
            for token in allowed_tokens:
                if token.starts_here(stream):
                    yield token(stream, indent)
                    done_something = True
                    break
            if not done_something:
                next(stream)
    except StopIteration:
        yield EndOfTextToken(stream, indent)
Fixed vim and zsh 2018-04-05 13:06:54 +02:00			`#!/usr/bin/env python`
			`# encoding: utf-8`

			`"""Not really a lexer in the classical sense, but code to convert snippet`
			`definitions into logical units called Tokens."""`

			`import string`
			`import re`

			`from UltiSnips.compatibility import as_unicode`
			`from UltiSnips.position import Position`
			`from UltiSnips.text import unescape`


			`class _TextIterator(object):`

			`"""Helper class to make iterating over text easier."""`

			`def __init__(self, text, offset):`
			`self._text = as_unicode(text)`
			`self._line = offset.line`
			`self._col = offset.col`

			`self._idx = 0`

			`def __iter__(self):`
			`"""Iterator interface."""`
			`return self`

			`def __next__(self):`
			`"""Returns the next character."""`
			`if self._idx >= len(self._text):`
			`raise StopIteration`

			`rv = self._text[self._idx]`
			`if self._text[self._idx] in ('\n', '\r\n'):`
			`self._line += 1`
			`self._col = 0`
			`else:`
			`self._col += 1`
			`self._idx += 1`
			`return rv`
			`next = __next__ # for python2`

			`def peek(self, count=1):`
			`"""Returns the next 'count' characters without advancing the stream."""`
			`if count > 1: # This might return '' if nothing is found`
			`return self._text[self._idx:self._idx + count]`
			`try:`
			`return self._text[self._idx]`
			`except IndexError:`
			`return None`

			`@property`
			`def pos(self):`
			`"""Current position in the text."""`
			`return Position(self._line, self._col)`


			`def _parse_number(stream):`
			`"""Expects the stream to contain a number next, returns the number without`
			`consuming any more bytes."""`
			`rv = ''`
			`while stream.peek() and stream.peek() in string.digits:`
			`rv += next(stream)`

			`return int(rv)`


			`def _parse_till_closing_brace(stream):`
			`"""`
			`Returns all chars till a non-escaped } is found. Other`
			`non escaped { are taken into account and skipped over.`

			`Will also consume the closing }, but not return it`
			`"""`
			`rv = ''`
			`in_braces = 1`
			`while True:`
			`if EscapeCharToken.starts_here(stream, '{}'):`
			`rv += next(stream) + next(stream)`
			`else:`
			`char = next(stream)`
			`if char == '{':`
			`in_braces += 1`
			`elif char == '}':`
			`in_braces -= 1`
			`if in_braces == 0:`
			`break`
			`rv += char`
			`return rv`


			`def _parse_till_unescaped_char(stream, chars):`
			`"""`
			`Returns all chars till a non-escaped char is found.`

			`Will also consume the closing char, but and return it as second`
			`return value`
			`"""`
			`rv = ''`
			`while True:`
			`escaped = False`
			`for char in chars:`
			`if EscapeCharToken.starts_here(stream, char):`
			`rv += next(stream) + next(stream)`
			`escaped = True`
			`if not escaped:`
			`char = next(stream)`
			`if char in chars:`
			`break`
			`rv += char`
			`return rv, char`


			`class Token(object):`

			`"""Represents a Token as parsed from a snippet definition."""`

			`def __init__(self, gen, indent):`
			`self.initial_text = as_unicode('')`
			`self.start = gen.pos`
			`self._parse(gen, indent)`
			`self.end = gen.pos`

			`def _parse(self, stream, indent):`
			`"""Parses the token from 'stream' with the current 'indent'."""`
			`pass # Does nothing`


			`class TabStopToken(Token):`

			`"""${1:blub}"""`
			`CHECK = re.compile(r'^\${\d+[:}]')`

			`@classmethod`
			`def starts_here(cls, stream):`
			`"""Returns true if this token starts at the current position in`
			`'stream'."""`
			`return cls.CHECK.match(stream.peek(10)) is not None`

			`def _parse(self, stream, indent):`
			`next(stream) # $`
			`next(stream) # {`

			`self.number = _parse_number(stream)`

			`if stream.peek() == ':':`
			`next(stream)`
			`self.initial_text = _parse_till_closing_brace(stream)`

			`def __repr__(self):`
			`return 'TabStopToken(%r,%r,%r,%r)' % (`
			`self.start, self.end, self.number, self.initial_text`
			`)`


			`class VisualToken(Token):`

			`"""${VISUAL}"""`
			`CHECK = re.compile(r"^\${VISUAL[:}/]")`

			`@classmethod`
			`def starts_here(cls, stream):`
			`"""Returns true if this token starts at the current position in`
			`'stream'."""`
			`return cls.CHECK.match(stream.peek(10)) is not None`

			`def _parse(self, stream, indent):`
			`for _ in range(8): # ${VISUAL`
			`next(stream)`

			`if stream.peek() == ':':`
			`next(stream)`
			`self.alternative_text, char = _parse_till_unescaped_char(stream, '/}')`
			`self.alternative_text = unescape(self.alternative_text)`

			`if char == '/': # Transformation going on`
			`try:`
			`self.search = _parse_till_unescaped_char(stream, '/')[0]`
			`self.replace = _parse_till_unescaped_char(stream, '/')[0]`
			`self.options = _parse_till_closing_brace(stream)`
			`except StopIteration:`
			`raise RuntimeError(`
			`"Invalid ${VISUAL} transformation! Forgot to escape a '/'?")`
			`else:`
			`self.search = None`
			`self.replace = None`
			`self.options = None`

			`def __repr__(self):`
			`return 'VisualToken(%r,%r)' % (`
			`self.start, self.end`
			`)`


			`class TransformationToken(Token):`

			`"""${1/match/replace/options}"""`

			`CHECK = re.compile(r'^\${\d+\/')`

			`@classmethod`
			`def starts_here(cls, stream):`
			`"""Returns true if this token starts at the current position in`
			`'stream'."""`
			`return cls.CHECK.match(stream.peek(10)) is not None`

			`def _parse(self, stream, indent):`
			`next(stream) # $`
			`next(stream) # {`

			`self.number = _parse_number(stream)`

			`next(stream) # /`

			`self.search = _parse_till_unescaped_char(stream, '/')[0]`
			`self.replace = _parse_till_unescaped_char(stream, '/')[0]`
			`self.options = _parse_till_closing_brace(stream)`

			`def __repr__(self):`
			`return 'TransformationToken(%r,%r,%r,%r,%r)' % (`
			`self.start, self.end, self.number, self.search, self.replace`
			`)`


			`class MirrorToken(Token):`

			`"""$1."""`
			`CHECK = re.compile(r'^\$\d+')`

			`@classmethod`
			`def starts_here(cls, stream):`
			`"""Returns true if this token starts at the current position in`
			`'stream'."""`
			`return cls.CHECK.match(stream.peek(10)) is not None`

			`def _parse(self, stream, indent):`
			`next(stream) # $`
			`self.number = _parse_number(stream)`

			`def __repr__(self):`
			`return 'MirrorToken(%r,%r,%r)' % (`
			`self.start, self.end, self.number`
			`)`


			`class EscapeCharToken(Token):`

			`"""\\n."""`
			`@classmethod`
			def starts_here(cls, stream, chars=r'{}\$`'):
			`"""Returns true if this token starts at the current position in`
			`'stream'."""`
			`cs = stream.peek(2)`
			`if len(cs) == 2 and cs[0] == '\\' and cs[1] in chars:`
			`return True`

			`def _parse(self, stream, indent):`
			`next(stream) # \`
			`self.initial_text = next(stream)`

			`def __repr__(self):`
			`return 'EscapeCharToken(%r,%r,%r)' % (`
			`self.start, self.end, self.initial_text`
			`)`


			`class ShellCodeToken(Token):`

			"""`echo "hi"`"""
			`@classmethod`
			`def starts_here(cls, stream):`
			`"""Returns true if this token starts at the current position in`
			`'stream'."""`
			return stream.peek(1) == '`'

			`def _parse(self, stream, indent):`
			next(stream) # `
			self.code = _parse_till_unescaped_char(stream, '`')[0]

			`def __repr__(self):`
			`return 'ShellCodeToken(%r,%r,%r)' % (`
			`self.start, self.end, self.code`
			`)`


			`class PythonCodeToken(Token):`

			"""`!p snip.rv = "Hi"`"""
			CHECK = re.compile(r'^`!p\s')

			`@classmethod`
			`def starts_here(cls, stream):`
			`"""Returns true if this token starts at the current position in`
			`'stream'."""`
			`return cls.CHECK.match(stream.peek(4)) is not None`

			`def _parse(self, stream, indent):`
			`for _ in range(3):`
			next(stream) # `!p
			`if stream.peek() in '\t ':`
			`next(stream)`

			code = _parse_till_unescaped_char(stream, '`')[0]

			`# Strip the indent if any`
			`if len(indent):`
			`lines = code.splitlines()`
			`self.code = lines[0] + '\n'`
			`self.code += '\n'.join([l[len(indent):]`
			`for l in lines[1:]])`
			`else:`
			`self.code = code`
			`self.indent = indent`

			`def __repr__(self):`
			`return 'PythonCodeToken(%r,%r,%r)' % (`
			`self.start, self.end, self.code`
			`)`


			`class VimLCodeToken(Token):`

			"""`!v g:hi`"""
			CHECK = re.compile(r'^`!v\s')

			`@classmethod`
			`def starts_here(cls, stream):`
			`"""Returns true if this token starts at the current position in`
			`'stream'."""`
			`return cls.CHECK.match(stream.peek(4)) is not None`

			`def _parse(self, stream, indent):`
			`for _ in range(4):`
			next(stream) # `!v
			self.code = _parse_till_unescaped_char(stream, '`')[0]

			`def __repr__(self):`
			`return 'VimLCodeToken(%r,%r,%r)' % (`
			`self.start, self.end, self.code`
			`)`


			`class EndOfTextToken(Token):`

			`"""Appears at the end of the text."""`

			`def __repr__(self):`
			`return 'EndOfText(%r)' % self.end`


			`def tokenize(text, indent, offset, allowed_tokens):`
			`"""Returns an iterator of tokens of 'text'['offset':] which is assumed to`
			`have 'indent' as the whitespace of the begging of the lines. Only`
			`'allowed_tokens' are considered to be valid tokens."""`
			`stream = _TextIterator(text, offset)`
			`try:`
			`while True:`
			`done_something = False`
			`for token in allowed_tokens:`
			`if token.starts_here(stream):`
			`yield token(stream, indent)`
			`done_something = True`
			`break`
			`if not done_something:`
			`next(stream)`
			`except StopIteration:`
			`yield EndOfTextToken(stream, indent)`