Radoslaw Poprawski
Active Member
- Joined
- Jun 19, 2021
- Messages
- 405
- Office Version
- 365
- Platform
- Windows
Hi,
So I am in bog need for fast help - hence solo searching is not an option in this case.
So I have array with numeric codes:
Then in this workbook I have 4 tables:
Sheet1: Table1
Sheet2: Table2
Sheet3: Table3
Sheet4: Table4
Now each Table has a column named "CompanyCode"
Now what I need is for each company code I need 1 new workbook with 4 sheets, 1 sheet per Table1 thru 4 filtered to specific Company code.
The result: 22 files, with 4 sheets each 1 sheet per Table.
Now below is my python code, and for some reason it works on a test file, but does not work on the main file I wanted to work.
the issue with this code is that it is not finding the headers in the file i need to work on, but find correctly headers on a test file.
For now I need ASAP a code that will work - be it VBA or Python,
So I am in bog need for fast help - hence solo searching is not an option in this case.
So I have array with numeric codes:
VBA Code:
CompanyCodeArray = {123, 234, 345 ... 22ndCode}
Sheet1: Table1
Sheet2: Table2
Sheet3: Table3
Sheet4: Table4
Now each Table has a column named "CompanyCode"
Now what I need is for each company code I need 1 new workbook with 4 sheets, 1 sheet per Table1 thru 4 filtered to specific Company code.
The result: 22 files, with 4 sheets each 1 sheet per Table.
Now below is my python code, and for some reason it works on a test file, but does not work on the main file I wanted to work.
the issue with this code is that it is not finding the headers in the file i need to work on, but find correctly headers on a test file.
Python:
def process_excel_file(file_path, config):
print("\nReading Excel file:", file_path)
# Read all sheets from the Excel file
excel_file = pd.ExcelFile(file_path)
print("\nAvailable sheets in the file:", excel_file.sheet_names)
# Read all tables
tables = {}
try:
# First, get all table names from the Excel file
excel = win32com.client.Dispatch("Excel.Application")
excel.Visible = False
wb = excel.Workbooks.Open(file_path)
print("\n=== Diagnostic Information ===")
for sheet in wb.Sheets:
print(f"\nAnalyzing sheet: {sheet.Name}")
# Check for QueryTables (Power Query tables)
try:
query_tables = sheet.QueryTables
if query_tables.Count > 0:
print(f"Found {query_tables.Count} Power Query table(s) in sheet {sheet.Name}")
for qt in query_tables:
print(f" - Query Table: {qt.Name}, Connection: {qt.Connection}")
except Exception as e:
print(f" Note: Could not check QueryTables: {str(e)}")
# Check ListObjects (visible tables)
try:
list_objects = sheet.ListObjects
if list_objects.Count > 0:
print(f"Found {list_objects.Count} List Object(s) (visible tables) in sheet {sheet.Name}")
for list_object in list_objects:
print(f" - Table Name: {list_object.Name}")
print(f" - Visible: {list_object.Parent.Visible}")
print(f" - Range Address: {list_object.Range.Address}")
# Only process tables that are in our required list
if list_object.Name not in REQUIRED_TABLES:
print(f" - Skipping table {list_object.Name} (not in required tables list)")
continue
# Get table range address
table_range = list_object.Range.Address
# Convert Excel range address to zero-based indices
# Remove $ signs and split into start/end cells
table_range = table_range.replace('$', '')
start_cell, end_cell = table_range.split(':')
# Read the sheet and extract the table data
df = pd.read_excel(file_path, sheet_name=sheet.Name)
# Convert column letter to number (e.g., 'A' -> 0, 'B' -> 1)
def col2num(col):
num = 0
for c in col:
if c.isalpha():
num = num * 26 + (ord(c.upper()) - ord('A')) + 1
return num - 1
# Extract row and column indices
start_col = col2num(''.join(c for c in start_cell if c.isalpha()))
start_row = int(''.join(c for c in start_cell if c.isdigit())) - 1
end_col = col2num(''.join(c for c in end_cell if c.isalpha())) + 1
end_row = int(''.join(c for c in end_cell if c.isdigit()))
# Extract table data
table_df = df.iloc[start_row:end_row, start_col:end_col]
if 'CompanyCode' in table_df.columns:
tables[list_object.Name] = table_df
print(f"Successfully read table: {list_object.Name} with {len(table_df)} rows")
else:
print(f"Warning: Table {list_object.Name} doesn't have CompanyCode column")
except Exception as e:
print(f" Note: Could not check ListObjects: {str(e)}")
wb.Close(False) # False to not save changes
excel.Quit()
except Exception as e:
print(f"Error reading tables: {str(e)}")
return
if not tables:
print("Error: No valid tables found with CompanyCode column")
return
# Get unique company codes from all tables
all_company_codes = set()
for df in tables.values():
# Convert CompanyCode column to string type
df['CompanyCode'] = df['CompanyCode'].astype(str)
company_codes = df['CompanyCode'].unique()
print(f"Found company codes in table: {sorted(company_codes)}")
all_company_codes.update(company_codes)
# Sort company codes naturally (so "2" comes before "10")
sorted_codes = sorted(all_company_codes, key=lambda x: [int(s) if s.isdigit() else s.lower() for s in re.split(r'(\d+)', x)])
print(f"Found {len(all_company_codes)} unique company codes: {sorted_codes}")
# Create output directory
output_dir = os.path.join(os.path.dirname(file_path), 'split_by_company')
os.makedirs(output_dir, exist_ok=True)
# Clear old files if configured
if config['remove_old']:
clear_old_files(output_dir)
print(f"\nCreating files in: {output_dir}")
# Create separate files for each company code
for company_code in sorted_codes:
# Create base filename
base_filename = f'Flat_Reckon_For_Company_{company_code}'
# Add date if configured
if config['add_date']:
date_str = get_last_day_previous_month()
base_filename = f'{base_filename}_{date_str}'
# Add version number if not removing old files
if not config['remove_old']:
next_version = get_next_version_number(output_dir, base_filename)
if next_version == 1:
output_file = os.path.join(output_dir, f'{base_filename}.xlsx')
# If file already exists, increment version
if os.path.exists(output_file):
output_file = os.path.join(output_dir, f'{base_filename}_V_{next_version}.xlsx')
else:
output_file = os.path.join(output_dir, f'{base_filename}_V_{next_version}.xlsx')
else:
output_file = os.path.join(output_dir, f'{base_filename}.xlsx')
print(f"\nProcessing Company {company_code}...")
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
# Filter and save each table
for table_name, df in tables.items():
filtered_df = df[df['CompanyCode'] == company_code]
if len(filtered_df) > 0:
print(f"- Writing {len(filtered_df)} rows to sheet {table_name}")
filtered_df.to_excel(writer, sheet_name=table_name, index=False)
print(f"Created file: {output_file}")
For now I need ASAP a code that will work - be it VBA or Python,