tools.format_save_bibs¶

Classes¶

Functions¶

format_bib_to_abbr_or_zotero_or_save_mode ¶

format_bib_to_abbr_or_zotero_or_save_mode(
    original_data, options
)

Formats bibliography data to multiple standard formats and returns as data lists.

Processes bibliography data and generates three standardized formats as string lists: abbreviated format, Zotero format, and save format.

Parameters:

Name	Type	Description	Default
`original_data`	`list[str] \| str`	Input bibliography data as list of strings or file path.	required
`options`	`dict[str, Any]`	Processing configuration options.	required

Returns:

Type	Description
`list[str]`	Tuple containing three lists of strings representing the formatted bibliography data
`list[str]`	in abbreviated, Zotero, and save formats.

Source code in pybibtexer/tools/format_save_bibs.py

def format_bib_to_abbr_or_zotero_or_save_mode(
    original_data: list[str] | str, options: dict[str, Any]
) -> tuple[list[str], list[str], list[str]]:
    """Formats bibliography data to multiple standard formats and returns as data lists.

    Processes bibliography data and generates three standardized formats as string lists:
    abbreviated format, Zotero format, and save format.

    Args:
        original_data: Input bibliography data as list of strings or file path.
        options: Processing configuration options.

    Returns:
        Tuple containing three lists of strings representing the formatted bibliography data
        in abbreviated, Zotero, and save formats.
    """
    # Generate for original data.
    data_list = transform_to_data_list(original_data, ".bib")

    # Parse data to abbr_library, zotero_library, and save_library.
    _options = {}
    _options.update(options)
    _python_bib = PythonRunBib(_options)
    abbr_library, zotero_library, save_library = _python_bib.parse_to_multi_standard_library(data_list)

    # Write with sorting blocks according to original cite keys.
    _options = {}
    _options.update(options)
    _options["is_sort_entry_fields"] = options.get("is_sort_entry_fields", True)  # Default is True.
    _options["is_sort_blocks"] = options.get("is_sort_blocks", False)  # Default is True.
    _python_write = PythonWriters(_options)
    return _python_write.write_multi_library_to_multi_data_list(abbr_library, zotero_library, save_library)

format_bib_to_abbr_zotero_save_modes ¶

format_bib_to_abbr_zotero_save_modes(
    original_data, path_output, options
)

Formats bibliography data to multiple standard formats.

Processes bibliography data and generates three standardized formats: abbreviated format, Zotero format, and save format.

Parameters:

Name	Type	Description	Default
`original_data`	`list[str] \| str`	Input bibliography data as list of strings or file path.	required
`path_output`	`str`	Output directory path for processed files.	required
`options`	`dict[str, Any]`	Processing configuration options.	required

Returns:

Type	Description
`None`	None

Source code in pybibtexer/tools/format_save_bibs.py

def format_bib_to_abbr_zotero_save_modes(
    original_data: list[str] | str, path_output: str, options: dict[str, Any]
) -> None:
    """Formats bibliography data to multiple standard formats.

    Processes bibliography data and generates three standardized formats:
    abbreviated format, Zotero format, and save format.

    Args:
        original_data: Input bibliography data as list of strings or file path.
        path_output: Output directory path for processed files.
        options: Processing configuration options.

    Returns:
        None
    """
    path_output = standard_path(path_output)

    # Generate for original data.
    data_list = transform_to_data_list(original_data, ".bib")

    # Parse data to abbr_library, zotero_library, and save_library.
    _options = {}
    _options.update(options)
    _python_bib = PythonRunBib(_options)
    abbr_library, zotero_library, save_library = _python_bib.parse_to_multi_standard_library(data_list)

    # Write with sorting blocks according to original cite keys.
    _options = {}
    _options.update(options)
    _options["is_sort_entry_fields"] = options.get("is_sort_entry_fields", True)  # Default is True.
    _options["is_sort_blocks"] = options.get("is_sort_blocks", False)  # Default is True.
    _python_write = PythonWriters(_options)
    _python_write.write_multi_library_to_multi_file(path_output, abbr_library, zotero_library, save_library)

format_bib_to_save_mode_by_entry_type ¶

format_bib_to_save_mode_by_entry_type(
    c_j_abbr,
    path_output,
    original_data,
    combine_year_length=1,
    default_year_list=None,
    write_flag_bib="w",
    check_bib_exist=False,
    write_flag_readme="w",
    check_md_exist=False,
    options=None,
)

Formats bibliography entries and organizes them by year and type.

Processes bibliography data and organizes it into separate files by entry type and year, generating both BibTeX files and README documentation.

Parameters:

Name	Type	Description	Default
`c_j_abbr`	`str`	Conference/Journal abbreviation used for naming output files.	required
`path_output`	`str`	Output directory path for processed files.	required
`original_data`	`list[str] \| str \| Library`	Input bibliography data in various formats (list of strings, file path, file, raw string, or Library object).	required
`combine_year_length`	`int`	Number of years to combine in each output file.	`1`
`default_year_list`	`list[str] \| None`	Specific years to process (if empty, processes all years).	`None`
`write_flag_bib`	`str`	Write mode for BibTeX files ("w" for write, "a" for append).	`'w'`
`check_bib_exist`	`bool`	Whether to check if BibTeX files exist before writing.	`False`
`write_flag_readme`	`str`	Write mode for README files ("w" for write, "a" for append).	`'w'`
`check_md_exist`	`bool`	Whether to check if README files exist before writing.	`False`
`options`	`dict[str, Any] \| None`	Additional processing options.	`None`

Returns:

Type	Description
`None`	None

Source code in pybibtexer/tools/format_save_bibs.py

def format_bib_to_save_mode_by_entry_type(
    c_j_abbr: str,
    path_output: str,
    original_data: list[str] | str | Library,
    combine_year_length: int = 1,
    default_year_list: list[str] | None = None,
    write_flag_bib: str = "w",
    check_bib_exist: bool = False,
    write_flag_readme: str = "w",
    check_md_exist: bool = False,
    options: dict[str, Any] | None = None,
) -> None:
    """Formats bibliography entries and organizes them by year and type.

    Processes bibliography data and organizes it into separate files by entry type and year,
    generating both BibTeX files and README documentation.

    Args:
        c_j_abbr: Conference/Journal abbreviation used for naming output files.
        path_output: Output directory path for processed files.
        original_data: Input bibliography data in various formats (list of strings,
            file path, file, raw string, or Library object).
        combine_year_length: Number of years to combine in each output file.
        default_year_list: Specific years to process (if empty, processes all years).
        write_flag_bib: Write mode for BibTeX files ("w" for write, "a" for append).
        check_bib_exist: Whether to check if BibTeX files exist before writing.
        write_flag_readme: Write mode for README files ("w" for write, "a" for append).
        check_md_exist: Whether to check if README files exist before writing.
        options: Additional processing options.

    Returns:
        None
    """
    if default_year_list is None:
        default_year_list = []

    if options is None:
        options = {}

    path_output = standard_path(path_output)

    # Set up processing options.
    _options = {}
    _options.update(options)
    _options["is_sort_entry_fields"] = True  # Force field sorting.
    _options["is_sort_blocks"] = True  # Force block sorting.
    _options["sort_entries_by_field_keys_reverse"] = False  # Sort in ascending order, default is True.

    # Initialize helper classes.
    _python_bib = PythonRunBib(_options)

    _options["empty_entry_cite_keys"] = True  # Allow empty citation keys.
    _python_writer = PythonWriters(_options)

    # Organize entries by type, year, volume, number, and month.
    entry_type_year_volume_number_month_entry_dict = _python_bib.parse_to_nested_entries_dict(original_data)

    # Process each entry type separately.
    for entry_type in entry_type_year_volume_number_month_entry_dict:
        # Filter years if specified.
        year_dict = entry_type_year_volume_number_month_entry_dict[entry_type]
        year_list = sort_int_str(list(year_dict.keys()))
        if default_year_list:
            year_list = [y for y in year_list if y in default_year_list]
        year_dict = {year: year_dict[year] for year in year_list}

        # Save bibliography files grouped by years.
        path_write = os.path.join(path_output, entry_type.lower(), "bib")
        for i in range(math.ceil(len(year_list) / combine_year_length)):
            # Determine year range for this file.
            start_year_index = i * combine_year_length
            end_year_index = min([(i + 1) * combine_year_length, len(year_list)])
            combine_year = year_list[start_year_index:end_year_index]

            # Create subset dictionary for these years.
            new_year_dict = {year: year_dict[year] for year in combine_year}
            entries: list[Block] = IterateCombineExtendDict().dict_update(copy.deepcopy(new_year_dict))

            # Generate filename based on year range.
            name = f"{c_j_abbr}_{combine_year[0]}"
            if len(combine_year) > 1:
                name += f"_{combine_year[-1]}"
            name += ".bib"

            # Write the bibliography file.
            _python_writer.write_to_file(entries, name, write_flag_bib, path_write, check_bib_exist)

        # Generate and save README documentation.
        path_write = os.path.join(path_output, entry_type.lower())
        readme_md = generate_readme(c_j_abbr, entry_type, year_dict)

        # Handle append mode for README.
        if re.search("a", write_flag_readme):
            old_readme_md = [re.sub(r"[ ]+", "", line) for line in read_list("README.md", "r", path_write)]
            readme_md = readme_md[3:] if old_readme_md else readme_md
            readme_md = [line for line in readme_md if re.sub(r"[ ]+", "", line) not in old_readme_md]

        write_list(readme_md, "README.md", write_flag_readme, path_write, check_md_exist)

generate_statistic_information ¶

generate_statistic_information(path_storage)

Generates statistical information from bibliography files.

Processes all BibTeX files in the directory tree and extracts key information (DOIs and URLs) into CSV files for analysis.

Parameters:

Name	Type	Description	Default
`path_storage`	`str`	Root directory containing BibTeX files to process.	required

Returns:

Type	Description
`None`	None

Source code in pybibtexer/tools/format_save_bibs.py

def generate_statistic_information(path_storage: str) -> None:
    """Generates statistical information from bibliography files.

    Processes all BibTeX files in the directory tree and extracts key information
    (DOIs and URLs) into CSV files for analysis.

    Args:
        path_storage: Root directory containing BibTeX files to process.

    Returns:
        None
    """
    # Find all BibTeX files in the directory tree.
    full_files = []
    for root, _, files in os.walk(path_storage):
        full_files.extend([os.path.join(root, f) for f in files if f.endswith(".bib")])

    # Configure processing options.
    _options = {
        "is_standardize_bib": False,  # Skip standardization, default is True.
        "choose_abbr_zotero_save": "save",  # Use save format, default is "save".
        "delete_field_list_for_save": [],  # Do not delete any fields, default is [].
        "function_common_again": False,  # Skip reprocessing, default is True.
        "function_common_again_abbr": False,  # Skip abbreviation reprocessing, default is True.
        "function_common_again_zotero": False,  # Skip Zotero reprocessing, default is True.
        "function_common_again_save": False,  # Skip save format reprocessing, default is True.
        "is_sort_entry_fields": False,  # Skip field sorting.
        "is_sort_blocks": False,  # Skip block sorting.
    }
    _python_bib = PythonRunBib(_options)

    # Process each BibTeX file.
    for f in full_files:
        informations = []
        library = _python_bib.parse_to_single_standard_library(f)

        # Extract DOI or URL for each entry.
        for entry in library.entries:
            flag = ""
            if not flag:
                flag = entry["doi"] if "doi" in entry else ""
            if not flag:
                flag = entry["url"] if "url" in entry else ""
            informations.append(flag + "\n")

        # Write information to CSV file.
        csv_path = f.replace(".bib", ".csv").replace(f"{os.sep}bib{os.sep}", f"{os.sep}url{os.sep}")
        write_list(informations, csv_path, "w", None, False)

    return None