Skip to content

bib.bibtexbase.standardize.do_on_entry_block

Entry block standardization utilities.

This module provides comprehensive functionality for processing and standardizing BibTeX entry blocks. Entry blocks contain the actual bibliographic data and require the most complex processing including field validation, formatting, and error checking.

Classes:

Name Description
StandardizeEntryBlock

Main class for standardizing BibTeX entry blocks with configurable field lists and comprehensive validation.

EntryBase

Base class providing utility methods for field extraction and brace/quote detection.

SplitEntry

Handles splitting of entry blocks based on field patterns.

AppendEntry

Manages field appending and line continuation processing.

ExtractEntry

Extracts and validates field content from entry blocks.

CheckEntry

Performs final validation and error checking on entry blocks.

Functions:

Name Description
add_brace_or_quote

Utility function for ensuring proper brace/quote matching in field values.

Classes

AppendEntry

AppendEntry()

Append Patch Bib.

Source code in pybibtexer/bib/bibtexbase/standardize/do_on_entry_block.py
def __init__(self) -> None:
    pass

Functions

append_field staticmethod
append_field(field_list, braces_or_quotes, block)

Append.

Parameters:

Name Type Description Default
field_list list[str]

Append field list.

required
braces_or_quotes tuple[str, str]

Brace or quote.

required
block list[str]

Data list.

required

Returns:

Type Description
list[str]

list[str]: new patch bib after appending.

Source code in pybibtexer/bib/bibtexbase/standardize/do_on_entry_block.py
@staticmethod
def append_field(field_list: list[str], braces_or_quotes: tuple[str, str], block: list[str]) -> list[str]:
    """Append.

    Args:
        field_list (list[str]): Append field list.
        braces_or_quotes (tuple[str, str]): Brace or quote.
        block (list[str]): Data list.

    Returns:
        list[str]: new patch bib after appending.
    """
    pre, _ = braces_or_quotes

    temp = rf"[%\s]*(?:{'|'.join(field_list)})"
    regex_field = re.compile(rf"{temp}\s*=\s*{pre}", flags=re.I)
    regex_field_abbr = re.compile(rf"{temp}\s*=\s*\w+[\w\-]*", flags=re.I)  # journal = EJC,
    regex_termination = re.compile(r"\s*@[a-zA-Z]*{", flags=re.I)

    # strip and append
    line_index, len_data, new_block = 0, len(block), []
    while line_index < len_data:
        line = block[line_index]
        line_index += 1
        if regex_field.match(line) or regex_termination.match(line) or regex_field_abbr.match(line):
            new_line = line
            while line_index < len_data:
                line = block[line_index]
                if regex_field.match(line) or regex_termination.match(line) or regex_field_abbr.match(line):
                    break
                else:
                    if line.lstrip():
                        new_line = new_line.rstrip() + " " + line.lstrip()  # append
                    line_index += 1
            new_block.append(new_line)
        else:
            new_block.append(line)
    return new_block

CheckEntry

Functions

check staticmethod
check(field_list, brace_or_quote, block)

Check.

Source code in pybibtexer/bib/bibtexbase/standardize/do_on_entry_block.py
@staticmethod
def check(
    field_list: list[str], brace_or_quote: tuple[str, str], block: list[str]
) -> tuple[dict[str, list[str]], list[str], bool]:
    """Check."""
    pre, post = brace_or_quote

    regex_entry = re.compile(r"\s*@[a-zA-Z]+{")
    regex_field = re.compile(rf"\s*(?:{'|'.join(field_list)})" + r"\s*=")
    entry_flag, brace_flag = False, False  # minimal conditions
    error_dict: dict[str, list[str]] = {}
    new_block = []
    for line in block:
        if regex_entry.match(line) and (not entry_flag):  # just iff exsiting one time in single patch bib
            if (line.count("{") != 1) or (line.count(",") != 1):
                error_dict.setdefault("Failed entry_type", []).append(line)
            else:
                entry_flag = True
                new_block.append(line)

        elif regex_field.match(line):
            new_block.append(add_brace_or_quote(pre, post, line))

        elif (line.strip() == "}") and (not brace_flag):  # just iff exsiting one time in single patch bib
            brace_flag = True
            new_block.append(line)

        else:
            error_dict.setdefault("Redundant content`", []).append(line)
    return error_dict, new_block, entry_flag and brace_flag

EntryBase

EntryBase()
Source code in pybibtexer/bib/bibtexbase/standardize/do_on_entry_block.py
def __init__(self) -> None:
    pass

Functions

obtain_braces_or_quotes staticmethod
obtain_braces_or_quotes(block)

Obtain braces or quotes in block.

Parameters:

Name Type Description Default
block list[str]

block.

required

Returns:

Type Description
tuple[str, str]

tuple[str, str]: the tuple of braces or quotes.

Source code in pybibtexer/bib/bibtexbase/standardize/do_on_entry_block.py
@staticmethod
def obtain_braces_or_quotes(block: list[str]) -> tuple[str, str]:
    """Obtain braces or quotes in block.

    Args:
        block (list[str]): block.

    Returns:
        tuple[str, str]: the tuple of braces or quotes.
    """
    content = "".join(block)
    regex_list = [
        re.compile(r'\btitles*\s*=\s*([{"])', flags=re.I),
        re.compile(r'\bauthors*\s*=\s*([{"])', flags=re.I),
        re.compile(r'\byears*\s*=\s*([{"])', flags=re.I),
        re.compile(r'\bpages*\s*=\s*([{"])', flags=re.I),
        re.compile(r'\burls*\s*=\s*([{"])', flags=re.I),
    ]
    flag_list_list = [sorted(set(regex.findall(content))) for regex in regex_list]

    flag_list_list = [f for f in flag_list_list if len(f) != 0]
    len_list = [len(f) for f in flag_list_list]

    # 0 or 1 or 2 flags
    if (len(len_list) == 0) or (2 in len_list) or (not all(f == flag_list_list[0] for f in flag_list_list)):
        return "", ""

    if flag_list_list[0][0] == "{":
        return "{", "}"
    else:
        return '"', '"'
obtain_fields
obtain_fields(
    block, default_fields_list, field_pattern="[\\w\\-]+"
)

Obtain fileds in block.

Parameters:

Name Type Description Default
block list[str]

block.

required
field_pattern str = r'[\w\-]+'

field pattern.

'[\\w\\-]+'

Returns:

Type Description
list[str]

list[str]: field list.

Source code in pybibtexer/bib/bibtexbase/standardize/do_on_entry_block.py
def obtain_fields(
    self, block: list[str], default_fields_list: list[str], field_pattern: str = r"[\w\-]+"
) -> list[str]:
    r"""Obtain fileds in block.

    Args:
        block (list[str]): block.
        field_pattern (str = r'[\w\-]+'): field pattern.

    Returns:
        list[str]: field list.
    """
    regex = re.compile(rf"({field_pattern})\s*=\s*(?:{'|'.join(FIELD_FORMAT_FLAG)})")  # support for abbreviation
    obtain_field_list = list(set(regex.findall("".join(block))))
    obtain_field_list = [field for field in obtain_field_list if field.lower() in default_fields_list]
    return sorted(obtain_field_list)

ExtractEntry

ExtractEntry()
Source code in pybibtexer/bib/bibtexbase/standardize/do_on_entry_block.py
def __init__(self) -> None:
    pass

Functions

extract
extract(field_list, brace_or_quote, block)

Extract.

Parameters:

Name Type Description Default
field_list list[str]

field list

required
brace_or_quote tuple[str, str]

(", ") or ({, })

required
block list[str]

the block

required
Return

tuple[list[str], list[str]]: main block, redundant part

Source code in pybibtexer/bib/bibtexbase/standardize/do_on_entry_block.py
def extract(
    self, field_list: list[str], brace_or_quote: tuple[str, str], block: list[str]
) -> tuple[list[str], list[str]]:
    """Extract.

    Args:
        field_list (list[str]): field list
        brace_or_quote (tuple[str, str]): (", ") or ({, })
        block (list[str]): the block

    Return:
        tuple[list[str], list[str]]: main block, redundant part
    """
    pre, post = brace_or_quote

    temp = rf"[%\s]*(?:{'|'.join(field_list)})"
    regex_field_two = re.compile(rf"({temp}\s*=\s*{pre})(.*)(\n*)", flags=re.I)
    regex_field_one = re.compile(rf"({temp}\s*=\s*{pre}.*{post})(.*)(\n*)", flags=re.I)
    regex_field_abbr = re.compile(rf"({temp}\s*=\s*\w+[\w\-]*)(.*)(\n*)", flags=re.I)
    regex_termination = re.compile(r"(\s*@[a-zA-Z]*{\s*[\w\-:/\\.\']*)(.*)(\n*)", flags=re.I)

    main_list, redundant_list = [], []

    for line in block:
        new_line, redundant = "", ""
        if mch := regex_termination.match(line):
            one, two, three = mch.groups()
            new_line = one + ",\n"
            if re.sub(r"[\s,\n\}]+", "", two):
                redundant = two + three

        elif mch := regex_field_abbr.match(line):
            one, two, three = mch.groups()
            new_line = one + ",\n"
            if re.sub(r"[\s,\n\}]+", "", two):
                redundant = two + three

        elif mch := regex_field_one.match(line):
            one, two, three = mch.groups()
            new_line = self._resub_brace_or_quote(pre, post, one + ",\n")
            if re.sub(r"[\s,\n\}]+", "", two):
                redundant = two + three

        elif mch := regex_field_two.match(line):
            one, two, three = mch.groups()
            new_line = self._resub_brace_or_quote(pre, post, one + two.strip() + post + ",\n")

        elif line.strip() == "}":
            pass

        else:
            return [], block

        if new_line:
            main_list.append(new_line)
        if redundant:
            redundant_list.append(redundant)

    # for enclosing "@[a-zA-Z]{"
    if main_list:
        main_list.append("}\n")
    return main_list, redundant_list

StandardizeEntryBlock

StandardizeEntryBlock(default_additional_field_list=None)

Stanndardize entry block.

Parameters:

Name Type Description Default
default_additional_field_list list[str] | None = None

Additional default fields.

None

Attributes:

Name Type Description
default_fields_list list[str]

Default fields.

Source code in pybibtexer/bib/bibtexbase/standardize/do_on_entry_block.py
def __init__(self, default_additional_field_list: list[str] | None = None) -> None:
    if default_additional_field_list is None:
        default_additional_field_list = []

    default_fields_old = [d.lower().strip() for d in DEFAULT_FIELDS_LIST]
    default_fields_new = [d.lower().strip() for d in default_additional_field_list]
    self.default_fields_list = list(set(default_fields_old).union(set(default_fields_new)))

Functions