You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
226 lines
6.5 KiB
226 lines
6.5 KiB
<?php
|
|
/*
|
|
* CSVFileProcessor.php
|
|
* Copyright (c) 2021 james@firefly-iii.org
|
|
*
|
|
* This file is part of the Firefly III Data Importer
|
|
* (https://github.com/firefly-iii/data-importer).
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as
|
|
* published by the Free Software Foundation, either version 3 of the
|
|
* License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Affero General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
declare(strict_types=1);
|
|
namespace App\Services\Import\Routine;
|
|
|
|
use App\Exceptions\ImporterErrorException;
|
|
use App\Services\CSV\Configuration\Configuration;
|
|
use App\Services\CSV\Specifics\SpecificService;
|
|
use App\Services\Import\Support\ProgressInformation;
|
|
use JsonException;
|
|
use League\Csv\Exception;
|
|
use League\Csv\Reader;
|
|
use League\Csv\ResultSet;
|
|
use League\Csv\Statement;
|
|
use Log;
|
|
|
|
/**
|
|
* Class CSVFileProcessor
|
|
*/
|
|
class CSVFileProcessor
|
|
{
|
|
use ProgressInformation;
|
|
|
|
private bool $hasHeaders;
|
|
private Reader $reader;
|
|
private array $specifics;
|
|
private string $delimiter;
|
|
private Configuration $configuration;
|
|
|
|
/**
|
|
* CSVFileProcessor constructor.
|
|
*
|
|
* @param Configuration $configuration
|
|
*/
|
|
public function __construct(Configuration $configuration)
|
|
{
|
|
$this->specifics = [];
|
|
$this->configuration = $configuration;
|
|
}
|
|
|
|
|
|
/**
|
|
* @param string $delimiter
|
|
*/
|
|
public function setDelimiter(string $delimiter): void
|
|
{
|
|
$map = [
|
|
'tab' => "\t",
|
|
'semicolon' => ';',
|
|
'comma' => ',',
|
|
];
|
|
|
|
$this->delimiter = $map[$delimiter] ?? ',';
|
|
}
|
|
|
|
/**
|
|
* Get a reader, and start looping over each line.
|
|
*
|
|
* @return array
|
|
*/
|
|
public function processCSVFile(): array
|
|
{
|
|
Log::debug('Now in startImportLoop()');
|
|
$offset = $this->hasHeaders ? 1 : 0;
|
|
try {
|
|
$this->reader->setDelimiter($this->delimiter);
|
|
} catch (Exception $e) {
|
|
Log::error($e->getMessage());
|
|
Log::error($e->getTraceAsString());
|
|
$message = sprintf('Could not set delimiter: %s', $e->getMessage());
|
|
$this->addError(0, $message);
|
|
return [];
|
|
}
|
|
Log::debug(sprintf('Offset is %d', $offset));
|
|
try {
|
|
$stmt = (new Statement)->offset($offset);
|
|
$records = $stmt->process($this->reader);
|
|
} catch (Exception $e) {
|
|
Log::error($e->getMessage());
|
|
Log::error($e->getTraceAsString());
|
|
$message = sprintf('Could not read CSV: %s', $e->getMessage());
|
|
$this->addError(0, $message);
|
|
return [];
|
|
}
|
|
|
|
try {
|
|
return $this->processCSVLines($records);
|
|
} catch (ImporterErrorException $e) {
|
|
Log::error($e->getMessage());
|
|
Log::error($e->getTraceAsString());
|
|
$message = sprintf('Could not parse CSV: %s', $e->getMessage());
|
|
$this->addError(0, $message);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Loop all records from CSV file.
|
|
*
|
|
* @param ResultSet $records
|
|
*
|
|
* @return array
|
|
* @throws ImporterErrorException
|
|
*/
|
|
private function processCSVLines(ResultSet $records): array
|
|
{
|
|
$updatedRecords = [];
|
|
$count = $records->count();
|
|
Log::info(sprintf('Now in %s with %d records', __METHOD__, $count));
|
|
$currentIndex = 1;
|
|
foreach ($records as $line) {
|
|
$line = $this->sanitize($line);
|
|
Log::debug(sprintf('Parsing line %d/%d', $currentIndex, $count));
|
|
$line = SpecificService::runSpecifics($line, $this->specifics);
|
|
$updatedRecords[] = $line;
|
|
$currentIndex++;
|
|
|
|
}
|
|
Log::info(sprintf('Parsed all %d lines.', $count));
|
|
|
|
// exclude double lines.
|
|
if ($this->configuration->isIgnoreDuplicateLines()) {
|
|
Log::info('Going to remove duplicate lines.');
|
|
$updatedRecords = $this->removeDuplicateLines($updatedRecords);
|
|
}
|
|
|
|
return $updatedRecords;
|
|
}
|
|
|
|
/**
|
|
* Do a first sanity check on whatever comes out of the CSV file.
|
|
*
|
|
* @param array $line
|
|
*
|
|
* @return array
|
|
*/
|
|
private function sanitize(array $line): array
|
|
{
|
|
$lineValues = array_values($line);
|
|
array_walk(
|
|
$lineValues, static function ($element) {
|
|
return trim(str_replace(' ', ' ', (string) $element));
|
|
}
|
|
);
|
|
|
|
return $lineValues;
|
|
}
|
|
|
|
/**
|
|
* @param array $array
|
|
*
|
|
* @return array
|
|
* @throws ImporterErrorException
|
|
*/
|
|
private function removeDuplicateLines(array $array): array
|
|
{
|
|
$hashes = [];
|
|
$return = [];
|
|
foreach ($array as $index => $line) {
|
|
try {
|
|
$hash = hash('sha256', json_encode($line, JSON_THROW_ON_ERROR));
|
|
} catch (JsonException $e) {
|
|
Log::error($e->getMessage());
|
|
Log::error($e->getTraceAsString());
|
|
throw new ImporterErrorException(sprintf('Could not decode JSON line #%d: %s', $index, $e->getMessage()));
|
|
}
|
|
if (in_array($hash, $hashes, true)) {
|
|
$message = sprintf('Going to skip line #%d because it\'s in the file twice. This may reset the count below.', $index);
|
|
Log::warning($message);
|
|
$this->addWarning($index, $message);
|
|
}
|
|
if (!in_array($hash, $hashes, true)) {
|
|
$hashes[] = $hash;
|
|
$return[] = $line;
|
|
}
|
|
}
|
|
Log::info(sprintf('Went from %d line(s) to %d line(s)', count($array), count($return)));
|
|
|
|
return $return;
|
|
}
|
|
|
|
/**
|
|
* @param bool $hasHeaders
|
|
*/
|
|
public function setHasHeaders(bool $hasHeaders): void
|
|
{
|
|
$this->hasHeaders = $hasHeaders;
|
|
}
|
|
|
|
/**
|
|
* @param Reader $reader
|
|
*/
|
|
public function setReader(Reader $reader): void
|
|
{
|
|
$this->reader = $reader;
|
|
}
|
|
|
|
/**
|
|
* @param array $specifics
|
|
*/
|
|
public function setSpecifics(array $specifics): void
|
|
{
|
|
$this->specifics = $specifics;
|
|
}
|
|
|
|
}
|