You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

671 lines
25 KiB

<?php
/*
* TransactionTransformer.php
* Copyright (c) 2021 james@firefly-iii.org
*
* This file is part of the Firefly III Data Importer
* (https://github.com/firefly-iii/data-importer).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace App\Services\SimpleFIN\Conversion;
use App\Services\CSV\Converter\Amount;
use App\Support\Http\CollectsAccounts;
use Carbon\Carbon;
use App\Services\Shared\Authentication\SecretManager;
use Illuminate\Support\Facades\Log;
use Exception;
// Removed SimpleFINModel imports as we now use arrays
/**
* Class TransactionTransformer
*/
class TransactionTransformer
{
use CollectsAccounts;
private array $expenseAccounts = [];
private array $revenueAccounts = [];
private bool $accountsCollected = false;
private array $pendingTransactionClusters = []; // For clustering similar transactions in clean instances
public function __construct()
{
bcscale(12);
}
/**
* Transform SimpleFIN transaction data (array) to Firefly III transaction format
*
* @param array $transactionData Raw transaction data from SimpleFIN JSON
* @param array $simpleFINAccountData Raw account data from SimpleFIN JSON for the account this transaction belongs to
* @param array $accountMapping Mapping configuration for Firefly III accounts
* @param array $newAccountConfig User-provided new account configuration data
*/
public function transform(array $transactionData, array $simpleFINAccountData, array $accountMapping = [], array $newAccountConfig = []): array
{
// Ensure amount is a float. SimpleFIN provides it as a string.
$amount = $transactionData['amount'] ?? '0.0';
// Skip zero-amount transactions as they're invalid for Firefly III
if (0 === bccomp('0', $amount)) {
Log::warning('Skipping zero-amount transaction', [
'transaction_id' => $transactionData['id'] ?? 'unknown',
'description' => $transactionData['description'] ?? 'unknown',
]);
return [];
}
$isDeposit = -1 === bccomp('0', $amount);
$absoluteAmount = Amount::positive($amount);
// Determine transaction type and accounts
if ($isDeposit) {
$type = 'deposit';
$sourceAccount = $this->getCounterAccount($transactionData, true);
$destinationAccount = $this->getFireflyAccount($simpleFINAccountData, $accountMapping, $newAccountConfig);
}
if (!$isDeposit) {
$type = 'withdrawal';
$sourceAccount = $this->getFireflyAccount($simpleFINAccountData, $accountMapping, $newAccountConfig);
$destinationAccount = $this->getCounterAccount($transactionData, false);
}
// Use 'posted' date as the primary transaction date.
// SimpleFIN 'posted' is a UNIX timestamp.
$transactionTimestamp = isset($transactionData['posted']) ? (int)$transactionData['posted'] : Carbon::now()->timestamp;
$transactionDateCarbon = Carbon::createFromTimestamp($transactionTimestamp);
return [
'type' => $type,
'date' => $transactionDateCarbon->format('Y-m-d'),
'amount' => $absoluteAmount,
'description' => $this->sanitizeDescription($transactionData['description'] ?? 'N/A'),
'source_id' => $sourceAccount['id'] ?? null,
'source_name' => $sourceAccount['name'] ?? null,
'source_iban' => $sourceAccount['iban'] ?? null,
'source_number' => $sourceAccount['number'] ?? null,
'source_bic' => $sourceAccount['bic'] ?? null,
'destination_id' => $destinationAccount['id'] ?? null,
'destination_name' => $destinationAccount['name'] ?? null,
'destination_iban' => $destinationAccount['iban'] ?? null,
'destination_number' => $destinationAccount['number'] ?? null,
'destination_bic' => $destinationAccount['bic'] ?? null,
'currency_code' => $this->getCurrencyCode($simpleFINAccountData),
'category_name' => $this->extractCategory($transactionData),
'reconciled' => false,
'notes' => $this->buildNotes($transactionData),
'tags' => $this->extractTags($transactionData),
'internal_reference' => $transactionData['id'] ?? null,
'external_id' => $this->buildExternalId($transactionData, $simpleFINAccountData),
'book_date' => $this->getBookDate($transactionData),
'process_date' => $this->getProcessDate($transactionData),
];
}
/**
* Get the Firefly III account information from mapping or account data
*/
private function getFireflyAccount(array $simpleFINAccountData, array $accountMapping, array $newAccountConfig = []): array
{
$accountKey = $simpleFINAccountData['id'] ?? null;
// Check for user-provided account name first, then fall back to SimpleFIN account name
$userProvidedName = null;
if ($accountKey && isset($newAccountConfig[$accountKey]['name'])) {
$userProvidedName = $newAccountConfig[$accountKey]['name'];
}
$accountName = $userProvidedName ?? $simpleFINAccountData['name'] ?? 'Unknown SimpleFIN Account';
// Check if account is mapped and has a valid (non-zero) Firefly III account ID
if ($accountKey && isset($accountMapping[$accountKey]) && $accountMapping[$accountKey] > 0) {
return [
'id' => $accountMapping[$accountKey], // Configuration maps SimpleFIN account ID directly to Firefly account ID
'name' => $accountName,
'iban' => null,
'number' => $accountKey,
'bic' => null,
];
}
// No mapping or mapped to 0 (deferred creation) - return null ID to trigger name-based account creation
return [
'id' => null,
'name' => $accountName,
'iban' => null,
'number' => $accountKey,
'bic' => null,
];
}
/**
* Get counter account (revenue/expense account) based on transaction data
*/
private function getCounterAccount(array $transactionData, bool $isDeposit): array
{
$description = $transactionData['description'] ?? 'N/A';
// Ensure accounts are collected
$this->ensureAccountsCollected();
// Try to find existing expense or revenue account first
$existingAccount = $this->findExistingAccount($description, $isDeposit);
if (null !== $existingAccount && [] !== $existingAccount) {
return [
'id' => $existingAccount['id'],
'name' => $existingAccount['name'],
'iban' => null,
'number' => null,
'bic' => null,
];
}
// For clean instances: try clustering when no existing accounts found
// This includes both clean instances (successful collection of zero accounts)
// and failed collection scenarios
if (config('simplefin.enable_transaction_clustering', true)) {
$accountsToCheck = $isDeposit ? $this->revenueAccounts : $this->expenseAccounts;
if (0 === count($accountsToCheck)) {
$clusteredAccountName = $this->findClusteredAccountName($description, $isDeposit);
if (null !== $clusteredAccountName && '' !== $clusteredAccountName && '0' !== $clusteredAccountName) {
return [
'id' => null,
'name' => $clusteredAccountName,
'iban' => null,
'number' => null,
'bic' => null,
];
}
}
}
// Fallback: extract meaningful counter account name from description
$counterAccountName = $this->extractCounterAccountName($description);
// Check if automatic account creation is enabled
if (!config('simplefin.auto_create_expense_accounts', true)) {
Log::warning(sprintf(
'Auto-creation disabled. No %s account will be created for "%s"',
$isDeposit ? 'revenue' : 'expense',
$description
));
return [
'id' => null,
'name' => $counterAccountName,
'iban' => null,
'number' => null,
'bic' => null,
];
}
Log::info(sprintf('Creating new %s account "%s" for transaction "%s"', $isDeposit ? 'revenue' : 'expense', $counterAccountName, $description));
return [
'id' => null,
'name' => $counterAccountName,
'iban' => null,
'number' => null,
'bic' => null,
];
}
/**
* Extract a meaningful counter account name from transaction description
*/
private function extractCounterAccountName(string $description): string
{
// Clean up and format the description for use as account name
$cleaned = trim($description);
// Remove common prefixes/suffixes that don't help identify the account
$patterns = [
'/^(PAYMENT|DEPOSIT|TRANSFER|DEBIT|CREDIT)\s+/i',
'/\s+(PAYMENT|DEPOSIT|TRANSFER|DEBIT|CREDIT)$/i',
'/^(FROM|TO)\s+/i',
'/\s+\d{4}[-\/]\d{2}[-\/]\d{2}.*$/', // Remove trailing dates
];
foreach ($patterns as $pattern) {
$cleaned = preg_replace($pattern, '', (string) $cleaned);
}
$cleaned = trim((string) $cleaned);
// If we end up with an empty string, use a generic name
if ('' === $cleaned) {
$cleaned = 'Unknown';
}
// Limit length to reasonable size
if (strlen($cleaned) > 100) {
return substr($cleaned, 0, 97).'...';
}
return $cleaned;
}
/**
* Get currency code, handling custom currencies
*/
private function getCurrencyCode(array $simpleFINAccountData): string
{
$currency = $simpleFINAccountData['currency'] ?? 'EUR'; // Default to EUR if not present
// Replicate basic logic from SimpleFINAccount::isCustomCurrency() if it checked for 'XXX' or non-standard codes.
// For now, pass through, or use a simple check. Let Firefly III handle currency validation.
// If currency code is not 3 uppercase letters, SimpleFIN spec might imply it's "custom".
// The previous code returned 'XXX' for custom.
if (3 === strlen($currency) && ctype_upper($currency)) {
return $currency;
}
return 'XXX'; // Default for non-standard or missing currency codes, matching previous behavior.
}
/**
* Extract category from transaction extra data
*/
private function extractCategory(array $transactionData): ?string
{
$extra = $transactionData['extra'] ?? null;
if (!is_array($extra)) {
return null;
}
// Check common category field names
$categoryFields = ['category', 'Category', 'CATEGORY', 'merchant_category', 'transaction_category'];
foreach ($categoryFields as $field) {
if (isset($extra[$field]) && '' !== (string) $extra[$field]) {
return (string) $extra[$field];
}
}
return null;
}
/**
* Extract tags from transaction extra data
*/
private function extractTags(array $transactionData): array
{
$tags = [];
if (isset($transactionData['pending']) && true === $transactionData['pending']) {
$tags[] = 'pending';
}
$extra = $transactionData['extra'] ?? null;
if (!is_array($extra)) {
// If no extra data, or not an array, return current tags (e.g. only 'pending' if applicable)
return array_unique($tags);
}
// Look for tags in extra data
if (isset($extra['tags']) && is_array($extra['tags'])) {
$tags = array_merge($tags, $extra['tags']);
}
// Add organization domain as tag if available
// Note: We don't have account info here, so this would need to be passed in
return array_unique($tags);
}
/**
* Build notes from transaction extra data
*/
private function buildNotes(array $transactionData): ?string
{
$notes = [];
$extra = $transactionData['extra'] ?? null;
if (isset($transactionData['pending']) && true === $transactionData['pending']) {
$notes[] = 'Transaction is pending';
}
if (is_array($extra)) {
// Add any extra fields that might be useful as notes
$noteFields = ['memo', 'notes', 'reference', 'check_number'];
foreach ($noteFields as $field) {
if (isset($extra[$field]) && '' !== (string) $extra[$field]) {
$notes[] = sprintf('- %s: %s', ucfirst($field), $extra[$field]);
}
}
}
return 0 === count($notes) ? null : implode("\n", $notes);
}
/**
* Build external ID for transaction
*/
private function buildExternalId(array $transactionData, array $simpleFINAccountData): string
{
return sprintf('ff3-%s-%s', $simpleFINAccountData['id'] ?? 'unknown_account', $transactionData['id'] ?? 'unknown_transaction');
}
/**
* Get book date from transaction data (using 'posted' timestamp)
*/
private function getBookDate(array $transactionData): ?string
{
if (isset($transactionData['posted']) && (int)$transactionData['posted'] > 0) {
return Carbon::createFromTimestamp((int)$transactionData['posted'])->format('Y-m-d');
}
return null;
}
/**
* Get process date from transaction data
* SimpleFIN JSON does not typically include a separate 'transacted_at'.
* This method will return null unless 'transacted_at' is explicitly in $transactionData.
*/
private function getProcessDate(array $transactionData): ?string
{
if (isset($transactionData['transacted_at']) && (int)$transactionData['transacted_at'] > 0) {
return Carbon::createFromTimestamp((int)$transactionData['transacted_at'])->format('Y-m-d');
}
return null;
}
/**
* Ensure expense and revenue accounts are collected from Firefly III
*/
private function ensureAccountsCollected(): void
{
if ($this->accountsCollected) {
return;
}
// Check if smart matching is enabled before attempting collection
if (!config('simplefin.smart_expense_matching', true)) {
Log::debug('Smart expense matching is disabled, skipping account collection');
$this->expenseAccounts = [];
$this->revenueAccounts = [];
$this->accountsCollected = true;
return;
}
try {
// Verify authentication context exists before making API calls
$baseUrl = SecretManager::getBaseUrl();
$accessToken = SecretManager::getAccessToken();
if ('' === $baseUrl || '' === $accessToken) {
Log::warning('Missing authentication context for account collection, skipping smart matching');
$this->expenseAccounts = [];
$this->revenueAccounts = [];
$this->accountsCollected = true;
return;
}
Log::debug('Collecting expense accounts from Firefly III');
$this->expenseAccounts = $this->collectExpenseAccounts();
Log::debug('Collecting revenue accounts from Firefly III');
$this->revenueAccounts = $this->collectRevenueAccounts();
Log::debug(sprintf(
'Collected %d expense accounts and %d revenue accounts',
count($this->expenseAccounts),
count($this->revenueAccounts)
));
$this->accountsCollected = true;
} catch (Exception $e) {
Log::error(sprintf('Failed to collect accounts: %s', $e->getMessage()));
Log::debug('Continuing without smart expense matching due to collection failure');
$this->expenseAccounts = [];
$this->revenueAccounts = [];
$this->accountsCollected = true; // Mark as collected to avoid repeated failures
}
}
/**
* Find existing expense or revenue account that matches the transaction description
*/
private function findExistingAccount(string $description, bool $isDeposit): ?array
{
$accountsToSearch = $isDeposit ? $this->revenueAccounts : $this->expenseAccounts;
$accountType = $isDeposit ? 'revenue' : 'expense';
if (0 === count($accountsToSearch)) {
Log::debug(sprintf('No %s accounts to search', $accountType));
return null;
}
// Normalize description for matching
$normalizedDescription = $this->normalizeForMatching($description);
// Try exact matches first
foreach ($accountsToSearch as $account) {
$normalizedAccountName = $this->normalizeForMatching($account['name']);
// Check for exact match
if ($normalizedAccountName === $normalizedDescription) {
Log::debug(sprintf('Exact match found: "%s" -> "%s"', $description, $account['name']));
return $account;
}
}
// Try fuzzy matching if no exact match found
$bestMatch = $this->findBestFuzzyMatch($normalizedDescription, $accountsToSearch);
if (null !== $bestMatch && [] !== $bestMatch) {
Log::debug(sprintf('Fuzzy match found: "%s" -> "%s" (similarity: %.2f)', $description, $bestMatch['account']['name'], $bestMatch['similarity']));
return $bestMatch['account'];
}
return null;
}
/**
* Normalize string for matching (lowercase, remove special chars, etc.)
*/
private function normalizeForMatching(string $text): string
{
// Convert to lowercase
$normalized = strtolower($text);
// Remove common transaction prefixes/suffixes
$patterns = [
'/^(payment|deposit|transfer|debit|credit)\s+/i',
'/\s+(payment|deposit|transfer|debit|credit)$/i',
'/^(from|to)\s+/i',
'/\s+\d{4}[-\/]\d{2}[-\/]\d{2}.*$/', // Remove trailing dates
'/\s+#\w+.*$/', // Remove trailing reference numbers
];
foreach ($patterns as $pattern) {
$normalized = preg_replace($pattern, '', (string) $normalized);
}
// Remove special characters and extra spaces
$normalized = preg_replace('/[^a-z0-9\s]/', '', (string) $normalized);
$normalized = preg_replace('/\s+/', ' ', (string) $normalized);
return trim((string) $normalized);
}
/**
* Find best fuzzy match using similarity algorithms
*/
private function findBestFuzzyMatch(string $normalizedDescription, array $accounts): ?array
{
// Check if smart matching is enabled
if (!config('simplefin.smart_expense_matching', true)) {
return null;
}
$bestMatch = null;
$bestSimilarity = 0;
$threshold = config('simplefin.expense_matching_threshold', 0.7);
foreach ($accounts as $account) {
$normalizedAccountName = $this->normalizeForMatching($account['name']);
// Calculate similarity using multiple algorithms
$similarity = $this->calculateSimilarity($normalizedDescription, $normalizedAccountName);
if ($similarity > $bestSimilarity && $similarity >= $threshold) {
$bestSimilarity = $similarity;
$bestMatch = [
'account' => $account,
'similarity' => $similarity,
];
}
}
return $bestMatch;
}
/**
* Calculate similarity between two strings using multiple algorithms
*/
private function calculateSimilarity(string $str1, string $str2): float
{
// Use Levenshtein distance for similarity
$maxLen = max(strlen($str1), strlen($str2));
if (0 === $maxLen) {
return 1.0;
}
$levenshtein = levenshtein($str1, $str2);
$levenshteinSimilarity = 1 - ($levenshtein / $maxLen);
// Use similar_text for additional comparison
similar_text($str1, $str2, $percent);
$similarTextSimilarity = $percent / 100;
// Check for substring matches (give bonus for contains)
$substringBonus = 0;
if (str_contains($str1, $str2) || str_contains($str2, $str1)) {
$substringBonus = 0.2;
}
// Weighted average of different similarity measures
$finalSimilarity = ($levenshteinSimilarity * 0.5) + ($similarTextSimilarity * 0.4) + $substringBonus;
return min(1.0, $finalSimilarity);
}
/**
* Sanitize description for safe display
*/
private function sanitizeDescription(string $description): string
{
// Remove any potentially harmful characters
$sanitized = strip_tags($description);
$sanitized = trim($sanitized);
// Ensure we have a non-empty description
if ('' === $sanitized) {
return 'SimpleFIN Transaction';
}
return $sanitized;
}
/**
* Find clustered account name for clean instances without existing accounts
*/
private function findClusteredAccountName(string $description, bool $isDeposit): ?string
{
$accountType = $isDeposit ? 'revenue' : 'expense';
$normalizedDescription = $this->normalizeForMatching($description);
$threshold = config('simplefin.clustering_similarity_threshold', 0.7);
// Check existing clusters for similar descriptions
foreach ($this->pendingTransactionClusters as $clusterName => $cluster) {
if ($cluster['type'] !== $accountType) {
continue;
}
// Check similarity against cluster representative
$similarity = $this->calculateSimilarity($normalizedDescription, $cluster['normalized_name']);
if ($similarity >= $threshold) {
Log::debug(sprintf(
'Clustering "%s" with existing cluster "%s" (similarity: %.2f)',
$description,
$clusterName,
$similarity
));
// Add to existing cluster
$this->pendingTransactionClusters[$clusterName]['descriptions'][] = $description;
++$this->pendingTransactionClusters[$clusterName]['count'];
return $clusterName;
}
}
// No matching cluster found, create new cluster
$clusterName = $this->generateClusterName($description);
$this->pendingTransactionClusters[$clusterName] = [
'type' => $accountType,
'normalized_name' => $normalizedDescription,
'descriptions' => [$description],
'count' => 1,
'created_at' => Carbon::now()->getTimestamp(),
];
Log::debug(sprintf('Created new %s cluster "%s" for "%s"', $accountType, $clusterName, $description));
return $clusterName;
}
/**
* Generate meaningful cluster name from transaction description
*/
private function generateClusterName(string $description): string
{
// Extract core business/merchant name for clustering
$cleaned = $this->extractCounterAccountName($description);
// Further normalize for cluster naming
$clusterName = preg_replace('/\b(payment|deposit|transfer|debit|credit|from|to)\b/i', '', $cleaned);
$clusterName = preg_replace('/\s+/', ' ', trim((string) $clusterName));
// Remove trailing numbers/references that could vary
$clusterName = preg_replace('/\s+\d+\s*$/', '', (string) $clusterName);
$clusterName = preg_replace('/\s+#\w+.*$/', '', (string) $clusterName);
// Ensure minimum meaningful length
if (strlen((string) $clusterName) < 3) {
$clusterName = $cleaned; // Fall back to basic cleaning
}
return trim((string) $clusterName);
}
}