Skip to content

yex.parse.Tokeniser

yex.parse.Tokeniser(doc, source, pushback=None) #

A tokeniser takes characters from a source, such as a file, and produces tokens of the correct categories.

Then, an parser will request tokens from the tokeniser, and do something with them. Hopefully, it'll be something useful.

Source code in yex/parse/tokeniser.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def __init__(self,
             doc: 'yex.Document',
             source: Union[TextIO, List, str],
             pushback: Union['yex.parse.Pushback', None]=None,
             ):

    self.doc = doc
    self.catcodes = doc.controls[r'\catcode']

    self.line_status = self.BEGINNING_OF_LINE

    self.pushback = pushback
    if self.pushback is None:
        self.pushback = yex.parse.Pushback()

    if source is None:
        source = ''

    try:
        name = source.name
    except AttributeError:
        name = repr(source)

    self.source = yex.parse.Source.from_value(
            v = source,
            name = name,
            )

    # For convenience, we allow direct access to some of
    # the source's methods.
    for name in [
            'location',
            'exhaust_at_eol',
            ]:
        setattr(self, name, getattr(self.source, name))

    self.source.line_number_setter = (
            doc.get_control(r'\inputlineno').update
            )
    self._iterator = self._read()

    self.incoming = Incoming(
            source = _caret_eater(self.source),
            pushback = self.pushback,
            )

correct_line_number() #

Assigns the correct line number to \inputlineno.

You only need to call this if you've already changed it temporarily: for example, by doing an \input. Otherwise, it updates automatically.

Source code in yex/parse/tokeniser.py
117
118
119
120
121
122
123
124
125
126
def correct_line_number(self) -> None:
    r"""
    Assigns the correct line number to \inputlineno.

    You only need to call this if you've already changed it temporarily:
    for example, by doing an \input. Otherwise, it updates automatically.
    """
    if self.source.line_number_setter is not None and \
            self.source.line_number is not None:
        self.source.line_number_setter(self.source.line_number)

eat_optional_char(ch) #

If the next token stands for the given character, we eat and return it. Otherwise, no character is consumed, and we return None.

Parameters:

Name Type Description Default
ch str

the character, to check whether token.ch==ch

required

Returns:

Type Description
typing.Union[yex.parse.token.Token, None]

Token, or None.

Source code in yex/parse/tokeniser.py
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
def eat_optional_char(self, ch: str) -> Union[Token, None]:
    """
    If the next token stands for the given character, we eat and return it.
    Otherwise, no character is consumed, and we return None.

    Args:
        ch (str): the character, to check whether token.ch==ch

    Returns:
        Token, or None.
    """

    token = next(self._iterator)

    if hasattr(token, 'ch') and token.ch==ch:
        logger.debug("    -- %s: %s.ch==%s",
                self, token, repr(ch))
        return token
    else:
        logger.debug("    -- %s: %s.ch is not %s",
                self, token, repr(ch))
        self.push(token)
        return None

eat_optional_spaces() #

Eats zero or more space tokens. This is on p264 of the TeΧbook.

Returns:

Type Description
typing.List[yex.parse.token.Token]

a list of the Tokens consumed.

Source code in yex/parse/tokeniser.py
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
def eat_optional_spaces(self) -> List[Token]:
    """
    Eats zero or more space tokens.
    This is <optional spaces> on p264 of the TeXbook.

    Returns:
        a list of the Tokens consumed.
    """
    result = []

    for token in self._iterator:
        if token is None:
            return result
        elif isinstance(token, Token) and token.is_space:
            result.append(token)
        else:
            self.push(token)
            return result

eat_whitespace_after_control() #

Eats all the next tokens which disappear after a control-- these being spaces and newlines.

Source code in yex/parse/tokeniser.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
def eat_whitespace_after_control(self) -> None:
    r"""
    Eats all the next tokens which disappear after a control--
    these being spaces and newlines.
    """
    while True:

        c = self.pushback.pop()
        if c is None:
            c = next(self.incoming)

        if c is None:
            return
        elif self._get_catcode(c) not in Token.DISAPPEARS_AFTER_CONTROL:
            logger.debug("%s: not whitespace, pushing back: %s",
                    self, c);
            self.push(c)
            return
        else:
            logger.debug("%s: whitespace after control; absorbing: %s",
                    self, c);

peek() #

Returns the next character to be produced by next(), but doesn't consume it. When you next call next(), or call peek() again, the result will be the same.

Source code in yex/parse/tokeniser.py
470
471
472
473
474
475
476
477
478
479
480
481
def peek(self) -> Any:
    """
    Returns the next character to be produced by __next__(),
    but doesn't consume it. When you next call __next__(),
    or call peek() again, the result will be the same.
    """

    result = next(self)

    self.push(result)

    return result

push(thing) #

Pushes something back. Next time someone reads the tokeniser, they will get this item-- unless someone pushes something else back, which will come out first.

Parameters:

Name Type Description Default
thing typing.Any

anything you like, which gets pushed back.

required
Source code in yex/parse/tokeniser.py
75
76
77
78
79
80
81
82
83
84
def push(self, thing: Any) -> None:
    """
    Pushes something back. Next time someone reads the tokeniser,
    they will get this item-- unless someone pushes something
    else back, which will come out first.

    Args:
        thing: anything you like, which gets pushed back.
    """
    self.pushback.push(thing)