Skip to content

tools.compare.compare_bibs

Classes

Functions

compare_bibs_with_local

compare_bibs_with_local(
    original_data,
    path_spidered_bibs,
    path_spidering_bibs,
    path_output,
    options,
)

Compare bibliography entries with local bibliography collections.

Processes original bibliography data and compares it against local bib files, categorizing entries into found, not found, and duplicate categories. Results are written to separate output files.

Parameters:

Name Type Description Default
original_data list[str] | str

Input bibliography data as string or list of strings

required
path_spidered_bibs str

Path to pre-collected/spidered bibliography files

required
path_spidering_bibs str

Path to actively spidered bibliography files

required
path_output str

Output directory for result files

required
options dict[str, Any]

Configuration options for comparison behavior

compare_each_entry_with_all_local_bibs: Whether to compare each Entry with all local bib files.

required
Source code in pybibtexer/tools/compare/compare_bibs.py
def compare_bibs_with_local(
    original_data: list[str] | str,
    path_spidered_bibs: str,
    path_spidering_bibs: str,
    path_output: str,
    options: dict[str, Any],
) -> None:
    """Compare bibliography entries with local bibliography collections.

    Processes original bibliography data and compares it against local bib files,
    categorizing entries into found, not found, and duplicate categories.
    Results are written to separate output files.

    Args:
        original_data: Input bibliography data as string or list of strings
        path_spidered_bibs: Path to pre-collected/spidered bibliography files
        path_spidering_bibs: Path to actively spidered bibliography files
        path_output: Output directory for result files
        options: Configuration options for comparison behavior

            compare_each_entry_with_all_local_bibs: Whether to compare each Entry with all local bib files.
    """
    path_output = standard_path(path_output)

    # generate for original data
    _options = {}
    _options.update(options)
    _python_bib = PythonRunBib(_options)
    data_list = transform_to_data_list(original_data, ".bib")
    library = _python_bib.parse_to_single_standard_library(data_list)
    original_entry_keys = [entry.key for entry in library.entries]

    # generate dict for abbr key entry
    if options.get("compare_each_entry_with_all_local_bibs"):
        abbr_key_entries_dict: dict[str, dict[str, Block]] = {"arXiv": {entry.key: entry for entry in library.entries}}
        not_in_local_entries = []
    else:
        abbr_key_entries_dict, not_in_local_entries = generate_abbr_key_entry_dict(library, options)

    # compare with local bibs
    tuple_entries = _compare_with_local(abbr_key_entries_dict, path_spidered_bibs, path_spidering_bibs, options)
    searched_entries, not_searched_entries, duplicate_original_entries, duplicate_searched_entries = tuple_entries
    not_in_local_entries.extend(not_searched_entries)

    # write with sorting blocks according to original cite keys
    _options = {}
    _options["is_sort_entry_fields"] = True  # default is True
    _options["is_sort_blocks"] = True  # default is True
    _options["sort_entries_by_cite_keys"] = original_entry_keys
    _python_write = PythonWriters(_options)
    _python_write.write_to_file(searched_entries, "in_local_entries.bib", "w", path_output, False)
    _python_write.write_to_file(not_in_local_entries, "not_in_local_entries.bib", "w", path_output, False)

    # write without sorting blocks
    _options = {}
    _options["is_sort_entry_fields"] = True  # default is True
    _options["is_sort_blocks"] = False  # default is True
    _python_write = PythonWriters(_options)
    _python_write.write_to_file(duplicate_original_entries, "duplicate_original_entries.bib", "w", path_output, False)
    _python_write.write_to_file(duplicate_searched_entries, "duplicate_searched_entries.bib", "w", path_output, False)
    return None

compare_bibs_with_zotero

compare_bibs_with_zotero(
    zotero_bib, download_bib, path_output, options
)

Compare downloaded bibliography entries with Zotero library entries.

Processes both Zotero export and downloaded bibliography files, then compares them to identify entries that exist only in the download set versus entries that exist in both collections.

Parameters:

Name Type Description Default
zotero_bib list[str] | str

Zotero exported bibliography data as string or list of strings

required
download_bib list[str] | str

Downloaded bibliography data as string or list of strings

required
path_output str

Output directory path for result files

required
options dict[str, Any]

Configuration options for parsing and comparison behavior

required
Source code in pybibtexer/tools/compare/compare_bibs.py
def compare_bibs_with_zotero(
    zotero_bib: list[str] | str, download_bib: list[str] | str, path_output: str, options: dict[str, Any]
) -> None:
    """Compare downloaded bibliography entries with Zotero library entries.

    Processes both Zotero export and downloaded bibliography files, then compares
    them to identify entries that exist only in the download set versus entries
    that exist in both collections.

    Args:
        zotero_bib: Zotero exported bibliography data as string or list of strings
        download_bib: Downloaded bibliography data as string or list of strings
        path_output: Output directory path for result files
        options: Configuration options for parsing and comparison behavior
    """
    path_output = standard_path(path_output)

    # for zotero bib
    _options = {}
    _options.update(options)
    _options["generate_entry_cite_keys"] = False  # default is False
    _python_bib = PythonRunBib(_options)
    data_list = transform_to_data_list(zotero_bib, ".bib")
    zotero_library = _python_bib.parse_to_single_standard_library(data_list)

    # for download bib
    _options = {}
    _options.update(options)
    _options["generate_entry_cite_keys"] = True  # default is False
    _python_bib = PythonRunBib(_options)
    data_list = transform_to_data_list(download_bib, ".bib")
    download_library = _python_bib.parse_to_single_standard_library(data_list)

    # compare download bib and zotero bib
    only_in_download_entries, in_download_and_zotero_entries = [], []
    for download_entry in download_library.entries:
        flag = False
        for zotero_entry in zotero_library.entries:
            if check_equal_for_entry(zotero_entry, download_entry, ["title"], None):
                in_download_and_zotero_entries.append(download_entry)
                flag = True
                break

        if not flag:
            only_in_download_entries.append(download_entry)

    # write
    _options = {}
    _options.update(options)
    _python_write = PythonWriters(_options)
    _python_write.write_to_file(only_in_download_entries, "only_in_download.bib", "w", path_output, False)
    _python_write.write_to_file(in_download_and_zotero_entries, "in_download_and_zotero.bib", "w", path_output, False)
    return None