Skip to content

main.basic_input

Classes

BasicInput

BasicInput(options)

Basic input.

Parameters:

Name Type Description Default
options dict[str, Any]

Options.

required

Attributes:

Name Type Description
full_abbr_article_dict dict[str, str]

Full abbr article dict.

full_abbr_inproceedings_dict dict[str, str]

Full abbr inproceedings dict.

full_names_in_json str

Full names in json.

abbr_names_in_json str

Abbr names in json.

abbr_article_pattern_dict dict

Pre-compiled regex patterns for journal name matching

abbr_inproceedings_pattern_dict dict

Pre-compiled regex patterns for conference name matching

full_biblatex_bib str

Path to the BibLaTex file

options dict[str, Any]

Options.

Notes

full_json_c (str): User-provided JSON file containing conference data. full_json_j (str): User-provided JSON file containing journal data.

Initialize the processor with configuration options.

Parameters:

Name Type Description Default
options dict[str, Any]

Configuration dictionary containing processing parameters

required
Source code in pybibtexer/main/basic_input.py
def __init__(self, options: dict[str, Any]) -> None:
    """Initialize the processor with configuration options.

    Args:
        options: Configuration dictionary containing processing parameters
    """
    # Load special abbreviations for conferences and journals from built-in templates
    special_abbr_dict_c = self._process_build_in_json("conferences_special.json")
    special_abbr_dict_j = self._process_build_in_json("journals_special.json")

    # Load default abbreviations for conferences and journals from built-in templates
    default_abbr_dict_c = self._process_build_in_json("conferences.json")
    default_abbr_dict_j = self._process_build_in_json("journals.json")

    # Load user-defined abbreviations from provided JSON files
    full_json_c, full_json_j = options.get("full_json_c", ""), options.get("full_json_j", "")
    user_abbr_dict_c, user_abbr_dict_j = process_user_conferences_journals_json(full_json_c, full_json_j)

    # Merge dictionaries with precedence: user > default
    full_abbr_article_dict = {**default_abbr_dict_j, **user_abbr_dict_j}
    full_abbr_inproceedings_dict = {**default_abbr_dict_c, **user_abbr_dict_c}

    # TODO: Whether to check?
    # Check for duplicate acronyms and abbreviations in the dictionaries
    # check = CheckAcronymAbbrAndFullDict()
    # full_abbr_article_dict = check.length_dupicate_match(full_abbr_article_dict)[0]
    # full_abbr_inproceedings_dict = check.length_dupicate_match(full_abbr_inproceedings_dict)[0]

    # Parse new abbreviations from BibLaTex file
    biblatex_dict_c, biblatex_dict_j = self._process_biblatex(options.get("full_biblatex_bib", ""))

    # Merge dictionaries with precedence: user > default > new > special
    full_abbr_article_dict = {**special_abbr_dict_j, **biblatex_dict_j, **full_abbr_article_dict}
    full_abbr_inproceedings_dict = {**special_abbr_dict_c, **biblatex_dict_c, **full_abbr_inproceedings_dict}

    # Convert to strict ordered dictionaries to maintain consistent ordering
    full_abbr_article_dict = StrictOrderedDict(full_abbr_article_dict)
    full_abbr_inproceedings_dict = StrictOrderedDict(full_abbr_inproceedings_dict)

    # Define JSON field names for full and abbreviated names
    full_names_in_json = "names_full"
    abbr_names_in_json = "names_abbr"

    # Pre-compile regex patterns for efficient text matching
    abbr_article_pattern_dict, abbr_inproceedings_pattern_dict = self.abbr_article_inproceedings_pattern(
        full_abbr_article_dict, full_abbr_inproceedings_dict, full_names_in_json, abbr_names_in_json
    )

    # Convert pattern dictionaries to strict ordered dictionaries
    abbr_article_pattern_dict = StrictOrderedDict(abbr_article_pattern_dict)
    abbr_inproceedings_pattern_dict = StrictOrderedDict(abbr_inproceedings_pattern_dict)

    # Store all configurations in options for later use by other methods
    options["full_abbr_article_dict"] = full_abbr_article_dict
    options["full_abbr_inproceedings_dict"] = full_abbr_inproceedings_dict
    options["full_names_in_json"] = full_names_in_json
    options["abbr_names_in_json"] = abbr_names_in_json
    options["abbr_article_pattern_dict"] = abbr_article_pattern_dict
    options["abbr_inproceedings_pattern_dict"] = abbr_inproceedings_pattern_dict

    self.options = options

Functions

abbr_article_inproceedings_pattern staticmethod
abbr_article_inproceedings_pattern(
    full_abbr_article_dict,
    full_abbr_inproceedings_dict,
    full_names_in_json,
    abbr_names_in_json,
)

Pre-compile regex patterns for journal and conference name matching.

Parameters:

Name Type Description Default
full_abbr_article_dict StrictOrderedDict

dictionary containing journal abbreviations and their full names

required
full_abbr_inproceedings_dict StrictOrderedDict

dictionary containing conference abbreviations and their full names

required
full_names_in_json str

Key for full names in the dictionary

required
abbr_names_in_json str

Key for abbreviation names in the dictionary

required

Returns:

Type Description
tuple[dict[str, dict[str, Any]], dict[str, dict[str, dict[str, Any]]]]

Tuple of two dictionaries containing pre-compiled regex patterns for journals and conferences

Source code in pybibtexer/main/basic_input.py
@staticmethod
def abbr_article_inproceedings_pattern(
    full_abbr_article_dict: StrictOrderedDict,
    full_abbr_inproceedings_dict: StrictOrderedDict,
    full_names_in_json: str,
    abbr_names_in_json: str,
) -> tuple[dict[str, dict[str, Any]], dict[str, dict[str, dict[str, Any]]]]:
    """Pre-compile regex patterns for journal and conference name matching.

    Args:
        full_abbr_article_dict: dictionary containing journal abbreviations and their full names
        full_abbr_inproceedings_dict: dictionary containing conference abbreviations and their full names
        full_names_in_json: Key for full names in the dictionary
        abbr_names_in_json: Key for abbreviation names in the dictionary

    Returns:
        Tuple of two dictionaries containing pre-compiled regex patterns for journals and conferences
    """

    def _create_pattern_dict(abbr_dict: StrictOrderedDict) -> dict[str, dict[str, Any]]:
        """Helper function to create pattern dictionary for a given abbreviation dictionary."""
        pattern_dict = {}
        for abbr, abbr_info in abbr_dict.items():
            # Get all name variations and combine with abbreviation
            full_names = abbr_info.get(full_names_in_json, [])
            long_abbrs = abbr_info.get(abbr_names_in_json, [])
            all_names = [*full_names, *long_abbrs, abbr]
            all_names = [m.lower() for m in all_names]

            # Create pre-compiled regex pattern for exact matching
            pattern_dict[abbr] = {
                "pattern": re.compile(rf"^({'|'.join(all_names)})$", flags=re.I),
                "names": all_names,
            }
        return pattern_dict

    abbr_article_pattern_dict = _create_pattern_dict(full_abbr_article_dict)
    abbr_inproceedings_pattern_dict = _create_pattern_dict(full_abbr_inproceedings_dict)

    return abbr_article_pattern_dict, abbr_inproceedings_pattern_dict

Functions