flow php

UNIFIED DATA PROCESSING FRAMEWORK

composer require flow-php/etl ^0.7.4

Changelog

elephant
extract

Extracts

Read from various data sources.

arrow
transform

Transforms

Shape and optimize for your needs.

arrow
load

Loads

Store and secure in one of many available data sinks.

<?php

declare(strict_types=1);

use function Flow\ETL\DSL\{config_builder, data_frame, from_cache, ref, to_stream};
use Flow\ETL\Adapter\Http\DynamicExtractor\NextRequestFactory;
use Flow\ETL\Adapter\Http\PsrHttpClientDynamicExtractor;
use Flow\ETL\Cache\PSRSimpleCache;
use Http\Client\Curl\Client;
use Nyholm\Psr7\Factory\Psr17Factory;
use Psr\Http\Message\{RequestInterface, ResponseInterface};
use Symfony\Component\Cache\Adapter\FilesystemAdapter;
use Symfony\Component\Cache\Psr16Cache;

require __DIR__ . '/../../../autoload.php';

$factory = new Psr17Factory();
$client = new Client($factory, $factory);

$from_github_api = new PsrHttpClientDynamicExtractor($client, new class implements NextRequestFactory {
    public function create(?ResponseInterface $previousResponse = null) : ?RequestInterface
    {
        $factory = new Psr17Factory();

        if ($previousResponse === null) {
            return $factory
                ->createRequest('GET', 'https://api.github.com/orgs/flow-php')
                ->withHeader('Accept', 'application/vnd.github.v3+json')
                ->withHeader('User-Agent', 'flow-php/etl');
        }

        return null;
    }
});

$adapter = new PSRSimpleCache(
    new Psr16Cache(
        new FilesystemAdapter(
            directory: __DIR__ . '/output/cache'
        )
    )
);

data_frame(config_builder()->cache($adapter))
    ->read(
        from_cache(
            id: 'github_api',
            fallback_extractor: $from_github_api
        )
    )
    ->cache('github_api')
    ->withEntry('unpacked', ref('response_body')->jsonDecode())
    ->select('unpacked')
    ->withEntry('unpacked', ref('unpacked')->unpack())
    ->renameAll('unpacked.', '')
    ->drop('unpacked')
    ->select('name', 'html_url', 'blog', 'login', 'public_repos', 'followers', 'created_at')
    ->write(to_stream(__DIR__ . '/output.txt', truncate: false))
    ->run();

Output

+----------+-----------------------------+---------------------+----------+--------------+-----------+----------------------+
|     name |                    html_url |                blog |    login | public_repos | followers |           created_at |
+----------+-----------------------------+---------------------+----------+--------------+-----------+----------------------+
| Flow PHP | https://github.com/flow-php | http://flow-php.com | flow-php |           30 |        89 | 2020-10-26T18:40:27Z |
+----------+-----------------------------+---------------------+----------+--------------+-----------+----------------------+
1 rows

Contributors

Join us on GitHub external resource