Cache extracted data to avoid repeated calls to slow or rate-limited sources like APIs. This example demonstrates caching HTTP responses from the GitHub API, so subsequent runs use cached data instead of making new requests.
UNIFIED DATA PROCESSING FRAMEWORK
composer require flow-php/etl ~0.33.0 Extracts
Read from various data sources.
Transforms
Shape and optimize for your needs.
Loads
Store and secure in one of many available data sinks.
Examples:
Description
Documentation
Code
<?php
declare(strict_types=1);
use function Flow\ETL\DSL\{config_builder, data_frame, filesystem_cache, from_cache, ref, rename_replace, to_output};
use Flow\ETL\Adapter\Http\DynamicExtractor\NextRequestFactory;
use Flow\ETL\Adapter\Http\PsrHttpClientDynamicExtractor;
use Http\Client\Curl\Client;
use Nyholm\Psr7\Factory\Psr17Factory;
use Psr\Http\Message\{RequestInterface, ResponseInterface};
require __DIR__ . '/vendor/autoload.php';
$factory = new Psr17Factory();
$client = new Client($factory, $factory);
$from_github_api = new PsrHttpClientDynamicExtractor($client, new class implements NextRequestFactory {
public function create(?ResponseInterface $previousResponse = null) : ?RequestInterface
{
$factory = new Psr17Factory();
if ($previousResponse === null) {
return $factory
->createRequest('GET', 'https://api.github.com/orgs/flow-php')
->withHeader('Accept', 'application/vnd.github.v3+json')
->withHeader('User-Agent', 'flow-php/etl');
}
return null;
}
});
data_frame(config_builder()->cache(filesystem_cache(__DIR__ . '/output/cache')))
->read(
from_cache(
id: 'github_api',
fallback_extractor: $from_github_api
)
)
->cache('github_api')
->withEntry('unpacked', ref('response_body')->jsonDecode())
->select('unpacked')
->withEntry('unpacked', ref('unpacked')->unpack())
->renameEach(rename_replace('unpacked.', ''))
->drop('unpacked')
->select('name', 'html_url', 'blog', 'login', 'public_repos', 'followers', 'created_at')
->write(to_output(truncate: false))
->run();