CsvEncoder.php 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Serializer\Encoder;
  11. use Symfony\Component\Serializer\Exception\InvalidArgumentException;
  12. use Symfony\Component\Serializer\Exception\UnexpectedValueException;
  13. /**
  14. * Encodes CSV data.
  15. *
  16. * @author Kévin Dunglas <dunglas@gmail.com>
  17. * @author Oliver Hoff <oliver@hofff.com>
  18. */
  19. class CsvEncoder implements EncoderInterface, DecoderInterface
  20. {
  21. public const FORMAT = 'csv';
  22. public const DELIMITER_KEY = 'csv_delimiter';
  23. public const ENCLOSURE_KEY = 'csv_enclosure';
  24. public const ESCAPE_CHAR_KEY = 'csv_escape_char';
  25. public const KEY_SEPARATOR_KEY = 'csv_key_separator';
  26. public const HEADERS_KEY = 'csv_headers';
  27. public const ESCAPE_FORMULAS_KEY = 'csv_escape_formulas';
  28. public const AS_COLLECTION_KEY = 'as_collection';
  29. public const NO_HEADERS_KEY = 'no_headers';
  30. public const OUTPUT_UTF8_BOM_KEY = 'output_utf8_bom';
  31. private const UTF8_BOM = "\xEF\xBB\xBF";
  32. private $formulasStartCharacters = ['=', '-', '+', '@'];
  33. private $defaultContext = [
  34. self::DELIMITER_KEY => ',',
  35. self::ENCLOSURE_KEY => '"',
  36. self::ESCAPE_CHAR_KEY => '',
  37. self::ESCAPE_FORMULAS_KEY => false,
  38. self::HEADERS_KEY => [],
  39. self::KEY_SEPARATOR_KEY => '.',
  40. self::NO_HEADERS_KEY => false,
  41. self::AS_COLLECTION_KEY => true,
  42. self::OUTPUT_UTF8_BOM_KEY => false,
  43. ];
  44. public function __construct(array $defaultContext = [])
  45. {
  46. $this->defaultContext = array_merge($this->defaultContext, $defaultContext);
  47. if (\PHP_VERSION_ID < 70400 && '' === $this->defaultContext[self::ESCAPE_CHAR_KEY]) {
  48. $this->defaultContext[self::ESCAPE_CHAR_KEY] = '\\';
  49. }
  50. }
  51. /**
  52. * {@inheritdoc}
  53. */
  54. public function encode($data, string $format, array $context = [])
  55. {
  56. $handle = fopen('php://temp,', 'w+');
  57. if (!is_iterable($data)) {
  58. $data = [[$data]];
  59. } elseif (empty($data)) {
  60. $data = [[]];
  61. } else {
  62. // Sequential arrays of arrays are considered as collections
  63. $i = 0;
  64. foreach ($data as $key => $value) {
  65. if ($i !== $key || !\is_array($value)) {
  66. $data = [$data];
  67. break;
  68. }
  69. ++$i;
  70. }
  71. }
  72. [$delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas, $outputBom] = $this->getCsvOptions($context);
  73. foreach ($data as &$value) {
  74. $flattened = [];
  75. $this->flatten($value, $flattened, $keySeparator, '', $escapeFormulas);
  76. $value = $flattened;
  77. }
  78. unset($value);
  79. $headers = array_merge(array_values($headers), array_diff($this->extractHeaders($data), $headers));
  80. if (!($context[self::NO_HEADERS_KEY] ?? $this->defaultContext[self::NO_HEADERS_KEY])) {
  81. fputcsv($handle, $headers, $delimiter, $enclosure, $escapeChar);
  82. }
  83. $headers = array_fill_keys($headers, '');
  84. foreach ($data as $row) {
  85. fputcsv($handle, array_replace($headers, $row), $delimiter, $enclosure, $escapeChar);
  86. }
  87. rewind($handle);
  88. $value = stream_get_contents($handle);
  89. fclose($handle);
  90. if ($outputBom) {
  91. if (!preg_match('//u', $value)) {
  92. throw new UnexpectedValueException('You are trying to add a UTF-8 BOM to a non UTF-8 text.');
  93. }
  94. $value = self::UTF8_BOM.$value;
  95. }
  96. return $value;
  97. }
  98. /**
  99. * {@inheritdoc}
  100. */
  101. public function supportsEncoding(string $format)
  102. {
  103. return self::FORMAT === $format;
  104. }
  105. /**
  106. * {@inheritdoc}
  107. */
  108. public function decode(string $data, string $format, array $context = [])
  109. {
  110. $handle = fopen('php://temp', 'r+');
  111. fwrite($handle, $data);
  112. rewind($handle);
  113. if (0 === strpos($data, self::UTF8_BOM)) {
  114. fseek($handle, \strlen(self::UTF8_BOM));
  115. }
  116. $headers = null;
  117. $nbHeaders = 0;
  118. $headerCount = [];
  119. $result = [];
  120. [$delimiter, $enclosure, $escapeChar, $keySeparator, , , , $asCollection] = $this->getCsvOptions($context);
  121. while (false !== ($cols = fgetcsv($handle, 0, $delimiter, $enclosure, $escapeChar))) {
  122. $nbCols = \count($cols);
  123. if (null === $headers) {
  124. $nbHeaders = $nbCols;
  125. if ($context[self::NO_HEADERS_KEY] ?? $this->defaultContext[self::NO_HEADERS_KEY]) {
  126. for ($i = 0; $i < $nbCols; ++$i) {
  127. $headers[] = [$i];
  128. }
  129. $headerCount = array_fill(0, $nbCols, 1);
  130. } else {
  131. foreach ($cols as $col) {
  132. $header = explode($keySeparator, $col);
  133. $headers[] = $header;
  134. $headerCount[] = \count($header);
  135. }
  136. continue;
  137. }
  138. }
  139. $item = [];
  140. for ($i = 0; ($i < $nbCols) && ($i < $nbHeaders); ++$i) {
  141. $depth = $headerCount[$i];
  142. $arr = &$item;
  143. for ($j = 0; $j < $depth; ++$j) {
  144. // Handle nested arrays
  145. if ($j === ($depth - 1)) {
  146. $arr[$headers[$i][$j]] = $cols[$i];
  147. continue;
  148. }
  149. if (!isset($arr[$headers[$i][$j]])) {
  150. $arr[$headers[$i][$j]] = [];
  151. }
  152. $arr = &$arr[$headers[$i][$j]];
  153. }
  154. }
  155. $result[] = $item;
  156. }
  157. fclose($handle);
  158. if ($asCollection) {
  159. return $result;
  160. }
  161. if (empty($result) || isset($result[1])) {
  162. return $result;
  163. }
  164. // If there is only one data line in the document, return it (the line), the result is not considered as a collection
  165. return $result[0];
  166. }
  167. /**
  168. * {@inheritdoc}
  169. */
  170. public function supportsDecoding(string $format)
  171. {
  172. return self::FORMAT === $format;
  173. }
  174. /**
  175. * Flattens an array and generates keys including the path.
  176. */
  177. private function flatten(iterable $array, array &$result, string $keySeparator, string $parentKey = '', bool $escapeFormulas = false)
  178. {
  179. foreach ($array as $key => $value) {
  180. if (is_iterable($value)) {
  181. $this->flatten($value, $result, $keySeparator, $parentKey.$key.$keySeparator, $escapeFormulas);
  182. } else {
  183. if ($escapeFormulas && \in_array(substr((string) $value, 0, 1), $this->formulasStartCharacters, true)) {
  184. $result[$parentKey.$key] = "\t".$value;
  185. } else {
  186. // Ensures an actual value is used when dealing with true and false
  187. $result[$parentKey.$key] = false === $value ? 0 : (true === $value ? 1 : $value);
  188. }
  189. }
  190. }
  191. }
  192. private function getCsvOptions(array $context): array
  193. {
  194. $delimiter = $context[self::DELIMITER_KEY] ?? $this->defaultContext[self::DELIMITER_KEY];
  195. $enclosure = $context[self::ENCLOSURE_KEY] ?? $this->defaultContext[self::ENCLOSURE_KEY];
  196. $escapeChar = $context[self::ESCAPE_CHAR_KEY] ?? $this->defaultContext[self::ESCAPE_CHAR_KEY];
  197. $keySeparator = $context[self::KEY_SEPARATOR_KEY] ?? $this->defaultContext[self::KEY_SEPARATOR_KEY];
  198. $headers = $context[self::HEADERS_KEY] ?? $this->defaultContext[self::HEADERS_KEY];
  199. $escapeFormulas = $context[self::ESCAPE_FORMULAS_KEY] ?? $this->defaultContext[self::ESCAPE_FORMULAS_KEY];
  200. $outputBom = $context[self::OUTPUT_UTF8_BOM_KEY] ?? $this->defaultContext[self::OUTPUT_UTF8_BOM_KEY];
  201. $asCollection = $context[self::AS_COLLECTION_KEY] ?? $this->defaultContext[self::AS_COLLECTION_KEY];
  202. if (!\is_array($headers)) {
  203. throw new InvalidArgumentException(sprintf('The "%s" context variable must be an array or null, given "%s".', self::HEADERS_KEY, get_debug_type($headers)));
  204. }
  205. return [$delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas, $outputBom, $asCollection];
  206. }
  207. /**
  208. * @return string[]
  209. */
  210. private function extractHeaders(iterable $data): array
  211. {
  212. $headers = [];
  213. $flippedHeaders = [];
  214. foreach ($data as $row) {
  215. $previousHeader = null;
  216. foreach ($row as $header => $_) {
  217. if (isset($flippedHeaders[$header])) {
  218. $previousHeader = $header;
  219. continue;
  220. }
  221. if (null === $previousHeader) {
  222. $n = \count($headers);
  223. } else {
  224. $n = $flippedHeaders[$previousHeader] + 1;
  225. for ($j = \count($headers); $j > $n; --$j) {
  226. ++$flippedHeaders[$headers[$j] = $headers[$j - 1]];
  227. }
  228. }
  229. $headers[$n] = $header;
  230. $flippedHeaders[$header] = $n;
  231. $previousHeader = $header;
  232. }
  233. }
  234. return $headers;
  235. }
  236. }