LibreCalc rephrases my functions made by Python

ghostmize · March 6, 2024, 12:27pm

Hey all,

I am using a python script to sum various values between various sheets in Calc.
It creates a new sheet where it adds up those cells into a new column via function, so it stays dynamic.

In my python script I added logs so I can verify the function is correct, here are some examples:
Added: the chronics with formula =SUM(2024.C49)
Added: elad magdasi featuring lucinee with formula =SUM(2024.C18)
Added: alfredo mazzilli with formula =SUM(2024.C5)

Seems right.
However, when opening the file in LibreCalc, I get #NAME? errors for all, and upon checking the functions, I see this:
=SUM(2024.c43)

It turned all my capital “C” to lower case “c” and that’s why it doesn’t work.

Why is that happening? how can I overcome it? my script is correct.

Thanks a lot in advance

mikekaganski · March 6, 2024, 12:32pm

Likely not, otherwise it would work. But impossible to say without seeing it.

ghostmize · March 6, 2024, 12:48pm

Here is the code creating the function:

    sheet_names = [sheet_name for sheet_name in wb.sheetnames if sheet_name not in ["Master", "Track Info"]]

    for sheet_name in sheet_names:
        sheet = wb[sheet_name]
        artists = set(sheet.cell(row=i, column=1).value for i in range(2, sheet.max_row + 1))

        for artist in artists:
            artist_row = next((row for row in range(2, sheet.max_row + 1) if sheet.cell(row=row, column=1).value == artist), None)
            if artist_row is None:
                continue

            artist_row_master = next((master_row for master_row in range(2, ws.max_row + 1) if ws.cell(row=master_row, column=1).value == artist), None)

            if artist_row_master is None:
                formula = f"=SUM({sheet_name}.C{artist_row})"
                ws.append([artist, formula])
                print(f"Added: {artist} with formula {formula}")
            else:
                existing_formula = ws.cell(row=artist_row_master, column=2).value
                new_formula = existing_formula + f", {sheet_name}.C{artist_row}" if existing_formula.startswith('=SUM(') else f"=SUM({sheet_name}.C{artist_row})"
                ws.cell(row=artist_row_master, column=2).value = new_formula
                print(f"Updated: {artist} with formula {new_formula}")

C is capitalized, while when opening the file in Libre it’s lowercased.

ghostmize · March 6, 2024, 12:49pm

and for wider reference, here is the entire script. Worth mentioning, I know there is a part there that lower cases the imported reports, but it doesn’t affect the functions writing. Moreover even if I remove the lower casing, it’s still results in the same issue.

import subprocess
import os
import datetime
import pandas as pd
from openpyxl.styles import PatternFill
from openpyxl.utils.dataframe import dataframe_to_rows

import openpyxl
from openpyxl import load_workbook


def main():
    # Create a new directory for the sales reports based on the current date and time
    dir_name = "FLR_Reports_" + datetime.datetime.now().strftime("%Y")
    os.makedirs(dir_name, exist_ok=True)
    print(f"Created directory: {dir_name}")

    # Determine the current year for file naming
    current_year = datetime.datetime.now().year

    # Define the paths and file names for the other scripts
    script1_path = 'triplevision_sales.py'
    script2_path = 'bandcamp_sales.py'
    report1_filename = f'triplevision_{current_year}.xlsx'
    report2_filename = f'bandcamp_{current_year}.xlsx'

    # Full paths for the reports
    report1_path = os.path.join(dir_name, report1_filename)
    report2_path = os.path.join(dir_name, report2_filename)

    # Run the first script and pass the directory and file name as arguments
    print("Running Triplevision sales script...")
    subprocess.run(['python', script1_path, dir_name, report1_filename], check=True)

    # Run the second script and pass the directory and file name as arguments
    print("Running Bandcamp sales script...")
    subprocess.run(['python', script2_path, dir_name, report2_filename], check=True)

    # Combine the reports
    combine_reports(report1_path, report2_path, dir_name)
    deduct_production_costs(os.path.join(dir_name, 'FINAL_combined_artist_revenue_report.xlsx'))

    # After updating the Excel file with new data
    format_excel_sheet(os.path.join(dir_name, 'FINAL_combined_artist_revenue_report.xlsx'), str(current_year))

    # Add the new function call here
    update_master_sheet(os.path.join(dir_name, 'FINAL_combined_artist_revenue_report.xlsx'))

    print("All tasks completed.")


def format_excel_sheet(report_path, sheet_name):
    wb = load_workbook(report_path)
    sheet = wb[sheet_name]

    # Define fills
    green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
    orange_fill = PatternFill(start_color="FFEB9C", end_color="FFEB9C", fill_type="solid")

    # Assuming the first row contains headers
    headers = [cell.value for cell in sheet[1]]

    # Finding the indices for the columns to be formatted
    to_be_paid_col = headers.index('To Be Paid') + 1  # +1 because openpyxl is 1-indexed
    production_costs_col = headers.index('Production Costs Deducted') + 1

    # Auto-adjust column widths and apply coloring
    for col in sheet.columns:
        max_length = max(len(str(cell.value)) for cell in col)
        sheet.column_dimensions[col[0].column_letter].width = max_length + 2  # Adding a bit extra space

        # Apply green fill to 'To Be Paid'
        if col[0].column == to_be_paid_col:
            for cell in col[1:]:  # Skip header
                cell.fill = green_fill

        # Apply orange fill to 'Production Costs Deducted'
        if col[0].column == production_costs_col:
            for cell in col[1:]:  # Skip header
                if cell.value == 'Yes':
                    cell.fill = orange_fill

    wb.save(report_path)

def deduct_production_costs(combined_report_path):
    # Load the Excel file
    with pd.ExcelFile(combined_report_path) as xls:
        track_info_df = pd.read_excel(xls, 'Track Info')
        current_year = datetime.datetime.now().year
        try:
            yearly_df = pd.read_excel(xls, sheet_name=str(current_year))
        except ValueError:  # In case the sheet for the current year doesn't exist yet
            yearly_df = pd.DataFrame()

    # Check if the indication column exists, if not add it
    if 'Production Costs Deducted' not in yearly_df.columns:
        yearly_df['Production Costs Deducted'] = 'No'  # Default to 'No' for all artists initially

    # Filter for new tracks added this year
    new_tracks = track_info_df[track_info_df['Year Added'] == current_year]

    # Mark the indication for production costs to be deducted for artists with new tracks
    for _, track in new_tracks.iterrows():
        artist = track['Asset Artist']
        if artist in yearly_df['Asset Artist'].values:
            yearly_df.loc[yearly_df['Asset Artist'] == artist, 'Production Costs Deducted'] = 'Yes'

    # Save the updated data back to the Excel file
    with pd.ExcelWriter(combined_report_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
        yearly_df.to_excel(writer, sheet_name=str(current_year), index=False)
        track_info_df.to_excel(writer, sheet_name='Track Info', index=False)

    print("Indication for production costs deduction updated for new tracks.")


def combine_reports(report1_path, report2_path, dir_name):
    # Load the reports
    report1 = pd.read_excel(report1_path)
    report2 = pd.read_excel(report2_path)

    # Convert 'Asset Artist' column to lowercase for case-insensitive comparison
    report1['Asset Artist'] = report1['Asset Artist'].str.lower()
    report2['Asset Artist'] = report2['Asset Artist'].str.lower()

    # Combine the main reports
    combined_report = pd.concat([report1, report2]).groupby('Asset Artist', as_index=False).agg({
        'Sale Count': 'sum',
        'Total Revenue': 'sum',
        'To Be Paid': 'sum'
    })

    # Determine the file path for the combined report
    combined_report_path = os.path.join(dir_name, 'FINAL_combined_artist_revenue_report.xlsx')

    # Determine the current year for sheet naming
    current_year = datetime.datetime.now().year

    if not os.path.exists(combined_report_path):
        # If the file does not exist, simply write the new file without specifying if_sheet_exists
        with pd.ExcelWriter(combined_report_path, engine='openpyxl', mode='w') as writer:
            combined_report.to_excel(writer, sheet_name=str(current_year), index=False)
        print(f"Created new report and saved to {combined_report_path} in sheet {current_year}")
    else:
        # If the file exists, use append mode and specify if_sheet_exists
        with pd.ExcelWriter(combined_report_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
            combined_report.to_excel(writer, sheet_name=str(current_year), index=False)
        print(f"Updated existing report and saved to {combined_report_path} in sheet {current_year}")

    # Load "Track Info" sheets from the new reports, if they exist
    try:
        track_info1 = pd.read_excel(report1_path, sheet_name='Track Info')
    except ValueError:
        track_info1 = pd.DataFrame()

    try:
        track_info2 = pd.read_excel(report2_path, sheet_name='Track Info')
    except ValueError:
        track_info2 = pd.DataFrame()

    # Convert 'Asset Artist' column to lowercase for case-insensitive comparison
    track_info1['Asset Artist'] = track_info1['Asset Artist'].str.lower()
    track_info2['Asset Artist'] = track_info2['Asset Artist'].str.lower()

    # Mark new tracks with the current year
    current_year = datetime.datetime.now().year
    if not track_info1.empty:
        track_info1['Year Added'] = current_year
    if not track_info2.empty:
        track_info2['Year Added'] = current_year

    combined_track_info = pd.concat([track_info1, track_info2]).drop_duplicates(subset=['Asset Artist', 'Asset Title'], keep='first').reset_index(drop=True)

    combined_report_path = os.path.join(dir_name, 'FINAL_combined_artist_revenue_report.xlsx')

    # Check if the combined report already exists to merge with existing "Track Info"
    if 'Track Info' in pd.ExcelFile(combined_report_path).sheet_names:
        existing_track_info = pd.read_excel(combined_report_path, sheet_name='Track Info')
        # Convert 'Asset Artist' column to lowercase for case-insensitive comparison
        existing_track_info['Asset Artist'] = existing_track_info['Asset Artist'].str.lower()
        # Combine new track info with existing, ensuring older "Year Added" is preserved
        combined_track_info = pd.concat([existing_track_info, combined_track_info]).drop_duplicates(subset=['Asset Artist', 'Asset Title'], keep='first').reset_index(drop=True)

    # Save updated "Track Info" back to the combined report
    with pd.ExcelWriter(combined_report_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
        combined_track_info.to_excel(writer, sheet_name='Track Info', index=False)

    print("Track Info combined and added to the report.")


def update_master_sheet(report_path):
    # Load the Excel file
    wb = load_workbook(report_path)

    # Create or overwrite the "Master" sheet
    if "Master" in wb.sheetnames:
        ws = wb["Master"]
        ws.delete_rows(2, ws.max_row)  # Clear existing data except headers
    else:
        ws = wb.create_sheet("Master")
        ws.append(["Asset Artist", "To Be Paid"])  # Add headers

    sheet_names = [sheet_name for sheet_name in wb.sheetnames if sheet_name not in ["Master", "Track Info"]]

    for sheet_name in sheet_names:
        sheet = wb[sheet_name]
        artists = set(sheet.cell(row=i, column=1).value for i in range(2, sheet.max_row + 1))

        for artist in artists:
            artist_row = next((row for row in range(2, sheet.max_row + 1) if sheet.cell(row=row, column=1).value == artist), None)
            if artist_row is None:
                continue

            artist_row_master = next((master_row for master_row in range(2, ws.max_row + 1) if ws.cell(row=master_row, column=1).value == artist), None)

            if artist_row_master is None:
                formula = f"=SUM({sheet_name}.C{artist_row})"
                ws.append([artist, formula])
                print(f"Added: {artist} with formula {formula}")
            else:
                existing_formula = ws.cell(row=artist_row_master, column=2).value
                new_formula = existing_formula + f", {sheet_name}.C{artist_row}" if existing_formula.startswith('=SUM(') else f"=SUM({sheet_name}.C{artist_row})"
                ws.cell(row=artist_row_master, column=2).value = new_formula
                print(f"Updated: {artist} with formula {new_formula}")

    wb.save(report_path)
    print("Master sheet update process completed with logging.")

if __name__ == "__main__":
    main()

mikekaganski · March 6, 2024, 12:51pm

Are you using Calc formula syntax with Pandas, where Excel syntax is expected (with ! between sheet name and address)?

JohnSUN · March 6, 2024, 12:54pm

Wrap sheet_name in single quotes.
By the way, I’m not sure that the SUM() function for a single cell is different from the usual ='2024'.C43

ghostmize · March 6, 2024, 1:04pm

It didn’t work when I tried with !, only works with a dot and a capital letter for the sheet.

I am using Calc not Excel

ghostmize · March 6, 2024, 1:06pm

It can also collect values from multiple cells, in case they exist in multiple sheets.

mikekaganski · March 6, 2024, 1:06pm

It doesn’t matter; what does is (1) what Pandas does / expects, and (2) the file format. Because Calc will expect Excel formulas in Excel files, and what it expects in its own ODS files depends on what setting is used.

JohnSUN · March 6, 2024, 1:08pm

Well, in this log that you showed in your question there was not a single example for this. I should have asked.

ghostmize · March 6, 2024, 1:19pm

True I just didn’t want to complicate things, as first I tried to get the single-ones to work, then I’d move forward.

ghostmize · March 6, 2024, 1:20pm

I think I don’t follow, I’m not 100% code expert here, what does it mean I should do?

mikekaganski · March 6, 2024, 1:24pm

And I don’t know Pandas. I have just a gut feeling that Pandas expects you to construct formulas in some different way … I google quickly, and see that they create formulas not like “{sheet}.C{number}”, but like df['Price'] = ((df['Total Value (Lacs)']) * 100000 / (df['Quantity']) * 100) (from here)

ghostmize · March 6, 2024, 2:00pm

It’s as if nothing is good enough for Calc.
If it shows in Calc like this from the script:
=(‘2024’.C17)
I get Error 501 (though it should work no?)

Then if I manually edit it to this by removing the ’ ':
=(2024.C17)
Then it works, though when I double click to edit it again, it shows like this with the ’ ’ again: (and still working)
=(‘2024’.C17)

What is that?

sokol92 · March 6, 2024, 4:15pm

Could you upload the resulting file?

ghostmize · March 6, 2024, 4:39pm

it’s a rather private revenue report, maybe I can send you privately?

sokol92 · March 6, 2024, 4:51pm

Yes, sure!

ghostmize · March 7, 2024, 1:03pm

Hey all!

Finally got the solution thanks to sokol92

I was trying to use Python with the LibreOffice syntax, while I should have done it with the Excel syntax.

This is the revised code which works:

def update_master_sheet(report_path):
    # Load the Excel file
    wb = load_workbook(report_path)

    # Create or overwrite the "Master" sheet
    if "Master" in wb.sheetnames:
        ws = wb["Master"]
        ws.delete_rows(2, ws.max_row)  # Clear existing data except headers
    else:
        ws = wb.create_sheet("Master")
        ws.append(["Asset Artist", "To Be Paid"])  # Add headers

    sheet_names = [sheet_name for sheet_name in wb.sheetnames if sheet_name not in ["Master", "Track Info"]]

    for sheet_name in sheet_names:
        sheet = wb[sheet_name]
        artists = set(sheet.cell(row=i, column=1).value for i in range(2, sheet.max_row + 1))

        for artist in artists:
            artist_row = next((row for row in range(2, sheet.max_row + 1) if sheet.cell(row=row, column=1).value == artist), None)
            if artist_row is None:
                continue

            artist_row_master = next((master_row for master_row in range(2, ws.max_row + 1) if ws.cell(row=master_row, column=1).value == artist), None)

            if artist_row_master is None:
                cells_formula = [f"'{sheet_name}'!C{artist_row}"]
            else:
                existing_formula = ws.cell(row=artist_row_master, column=2).value
                if existing_formula.startswith('=SUM('):
                    cells_formula = existing_formula[5:-1].split(", ")  # Extract existing cell references
                else:
                    cells_formula = [existing_formula[5:-1]]  # Extract existing cell reference

                cells_formula.append(f"'{sheet_name}'!C{artist_row}")  # Add current cell reference

            new_formula = f"=SUM({', '.join(cells_formula)})"
            if len(cells_formula) > 1:
                new_formula = f"=SUM({', '.join(cells_formula)})"
            ws.append([artist, new_formula])
            print(f"Updated: {artist} with formula {new_formula}")

    wb.save(report_path)
    print("Master sheet update process completed with logging.")

So basically when I set the formula to use !cellnumber instead of .cellnumber, it opened correctly and then LibreOffice converted it to .cellnumber

Thanks again everyone