DSL References
DSL stands for Domain Specific Language. In Flow, the DSL is a set of small functions that wrap object construction so pipelines read top-to-bottom. See the examples for usage in context.
SCHEMA
bool_schema(string $name, bool $nullable, ?Metadata $metadata) : BooleanDefinition datetime_schema(string $name, bool $nullable, ?Metadata $metadata) : DateTimeDefinition date_schema(string $name, bool $nullable, ?Metadata $metadata) : DateDefinition /**
* Create a Definition from an array representation.
*
* @param array<array-key, mixed> $definition
*
* @return Definition<mixed>
*/
definition_from_array(array $definition) : Definition /**
* Create a Definition from a Type.
*
* @param Type<mixed> $type
*
* @return Definition<mixed>
*/
definition_from_type(Reference|string $ref, Type $type, bool $nullable, ?Metadata $metadata) : Definition /**
* @template T of \UnitEnum
*
* @param class-string<T> $type
*
* @return EnumDefinition<T>
*/
enum_schema(string $name, string $type, bool $nullable, ?Metadata $metadata) : EnumDefinition float_schema(string $name, bool $nullable, ?Metadata $metadata) : FloatDefinition html_element_schema(string $name, bool $nullable, ?Metadata $metadata) : HTMLElementDefinition html_schema(string $name, bool $nullable, ?Metadata $metadata) : HTMLDefinition integer_schema(string $name, bool $nullable, ?Metadata $metadata) : IntegerDefinition /**
* Alias for `integer_schema`.
*/
int_schema(string $name, bool $nullable, ?Metadata $metadata) : IntegerDefinition json_schema(string $name, bool $nullable, ?Metadata $metadata) : JsonDefinition /**
* @template T
*
* @param ListType<T>|Type<list<T>> $type
*
* @return ListDefinition<T>
*/
list_schema(string $name, ListType|Type $type, bool $nullable, ?Metadata $metadata) : ListDefinition /**
* @template TKey of array-key
* @template TValue
*
* @param MapType<TKey, TValue>|Type<array<TKey, TValue>> $type
*
* @return MapDefinition<TKey, TValue>
*/
map_schema(string $name, MapType|Type $type, bool $nullable, ?Metadata $metadata) : MapDefinition null_schema(string $name, ?Metadata $metadata) : StringDefinition /**
* @param Definition<mixed> ...$definitions
*
* @return Schema
*/
schema(Definition $definitions) : Schema string_schema(string $name, bool $nullable, ?Metadata $metadata) : StringDefinition /**
* @template T
*
* @param StructureType<T>|Type<array<string, T>> $type
*
* @return StructureDefinition<T>
*/
structure_schema(string $name, StructureType|Type $type, bool $nullable, ?Metadata $metadata) : StructureDefinition /**
* Alias for `string_schema`.
*/
str_schema(string $name, bool $nullable, ?Metadata $metadata) : StringDefinition time_schema(string $name, bool $nullable, ?Metadata $metadata) : TimeDefinition uuid_schema(string $name, bool $nullable, ?Metadata $metadata) : UuidDefinition xml_element_schema(string $name, bool $nullable, ?Metadata $metadata) : XMLElementDefinition xml_schema(string $name, bool $nullable, ?Metadata $metadata) : XMLDefinition ENTRY
/**
* @return Entry<?bool>
*/
boolean_entry(string $name, ?bool $value, ?Metadata $metadata) : Entry /**
* @return Entry<?bool>
*/
bool_entry(string $name, ?bool $value, ?Metadata $metadata) : Entry /**
* @return Entry<?\DateTimeInterface>
*/
datetime_entry(string $name, DateTimeInterface|string|null $value, ?Metadata $metadata) : Entry /**
* @return Entry<?\DateTimeInterface>
*/
date_entry(string $name, DateTimeInterface|string|null $value, ?Metadata $metadata) : Entry /**
* @return Entry<?\UnitEnum>
*/
enum_entry(string $name, ?UnitEnum $enum, ?Metadata $metadata) : Entry /**
* @return Entry<?float>
*/
float_entry(string $name, string|int|float|null $value, ?Metadata $metadata) : Entry /**
* @return Entry<?HTMLElement>
*/
html_element_entry(string $name, Dom\HTMLElement|string|null $value, ?Metadata $metadata) : Entry /**
* @return Entry<?HTMLDocument>
*/
html_entry(string $name, Dom\HTMLDocument|string|null $value, ?Metadata $metadata) : Entry /**
* @return Entry<?int>
*/
integer_entry(string $name, ?int $value, ?Metadata $metadata) : Entry /**
* @return Entry<?int>
*/
int_entry(string $name, ?int $value, ?Metadata $metadata) : Entry /**
* @param null|array<array-key, mixed>|Json|string $data
*
* @return Entry<?Json>
*/
json_entry(string $name, Json|array|string|null $data, ?Metadata $metadata) : Entry /**
* @param null|array<array-key, mixed>|Json|string $data
*
* @throws InvalidArgumentException
*
* @return Entry<?Json>
*/
json_object_entry(string $name, Json|array|string|null $data, ?Metadata $metadata) : Entry /**
* @template T
*
* @param null|list<mixed> $value
* @param ListType<T> $type
*
* @return Entry<mixed>
*/
list_entry(string $name, ?array $value, ListType $type, ?Metadata $metadata) : Entry /**
* @template TKey of array-key
* @template TValue
*
* @param ?array<array-key, mixed> $value
* @param MapType<TKey, TValue> $mapType
*
* @return Entry<?array<TKey, TValue>>
*/
map_entry(string $name, ?array $value, MapType $mapType, ?Metadata $metadata) : Entry /**
* This functions is an alias for creating string entry from null.
* The main difference between using this function an simply str_entry with second argument null
* is that this function will also keep a note in the metadata that type might not be final.
* For example when we need to guess column type from rows because schema was not provided,
* and given column in the first row is null, it might still change once we get to the second row.
* That metadata is used to determine if string_entry was created from null or not.
*
* By design flow assumes when guessing column type that null would be a string (the most flexible type).
*
* @return Entry<?string>
*/
null_entry(string $name, ?Metadata $metadata) : Entry /**
* @return Entry<?string>
*/
string_entry(string $name, ?string $value, ?Metadata $metadata) : Entry /**
* @template T
*
* @param ?array<string, mixed> $value
* @param StructureType<T> $type
*
* @return Entry<?array<string, T>>
*/
structure_entry(string $name, ?array $value, StructureType $type, ?Metadata $metadata) : Entry /**
* @template T
*
* @param ?array<string, mixed> $value
* @param StructureType<T> $type
*
* @return Entry<?array<string, T>>
*/
struct_entry(string $name, ?array $value, StructureType $type, ?Metadata $metadata) : Entry /**
* @return Entry<?string>
*/
str_entry(string $name, ?string $value, ?Metadata $metadata) : Entry /**
* @return Entry<?\DateInterval>
*/
time_entry(string $name, DateInterval|string|null $value, ?Metadata $metadata) : Entry /**
* @return Entry<?\Flow\Types\Value\Uuid>
*/
uuid_entry(string $name, Uuid|string|null $value, ?Metadata $metadata) : Entry /**
* @return Entry<?\DOMElement>
*/
xml_element_entry(string $name, DOMElement|string|null $value, ?Metadata $metadata) : Entry /**
* @return Entry<?\DOMDocument>
*/
xml_entry(string $name, DOMDocument|string|null $value, ?Metadata $metadata) : Entry DATA_FRAME
/**
* Alias for save_mode_append().
*/
append() : SaveMode /**
* @param array<array<mixed>>|array<mixed|string> $data
* @param array<Partition>|Partitions $partitions
* @param null|Schema $schema
*/
array_to_row(array $data, EntryFactory $entryFactory, Partitions|array $partitions, ?Schema $schema) : Row /**
* @param array<array<mixed>>|array<mixed|string> $data
* @param array<Partition>|Partitions $partitions
* @param null|Schema $schema
*/
array_to_rows(array $data, EntryFactory $entryFactory, Partitions|array $partitions, ?Schema $schema) : Rows /**
* An alias for `ref`.
*/
col(string $entry) : EntryReference compare_entries_by_name(Order $order) : Comparator compare_entries_by_name_desc() : Comparator /**
* @param array<class-string<Entry<mixed>>, int> $priorities
*/
compare_entries_by_type(array $priorities, Order $order) : Comparator /**
* @param array<class-string<Entry<mixed>>, int> $priorities
*/
compare_entries_by_type_and_name(array $priorities, Order $order) : Comparator /**
* @param array<class-string<Entry<mixed>>, int> $priorities
*/
compare_entries_by_type_desc(array $priorities) : Comparator config() : Config config_builder() : ConfigBuilder data_frame(Config|ConfigBuilder|null $config) : Flow /**
* Alias for data_frame() : Flow.
*/
df(Config|ConfigBuilder|null $config) : Flow /**
* @param Entry<mixed> ...$entries
*/
entries(Entry $entries) : Entries /**
* Alias for save_mode_exception_if_exists().
*/
exception_if_exists() : SaveMode execution_context(?Config $config) : FlowContext /**
* In this mode, functions returns nulls instead of throwing exceptions.
*/
execution_lenient() : ExecutionMode /**
* In this mode, functions throws exceptions if the given entry is not found
* or passed parameters are invalid.
*/
execution_strict() : ExecutionMode filesystem_cache(Path|string|null $cache_dir, Filesystem $filesystem, Serializer $serializer) : FilesystemCache flow_context(?Config $config) : FlowContext generate_random_int(int $start, int $end, NativePHPRandomValueGenerator $generator) : int generate_random_string(int $length, NativePHPRandomValueGenerator $generator) : string /**
* Alias for save_mode_ignore().
*/
ignore() : SaveMode ignore_error_handler() : IgnoreError /**
* @param array<string|Type<mixed>>|Type<mixed> $type
* @param mixed $value
*/
is_type(Type|array $type, ?mixed $value) : bool /**
* @param array<Comparison|string>|Comparison $comparisons
*/
join_on(Comparison|array $comparisons, string $join_prefix) : Expression /**
* Alias for save_mode_overwrite().
*/
overwrite() : SaveMode print_rows(Rows $rows, int|bool $truncate, ?Formatter $formatter) : string random_string(ScalarFunction|int $length, RandomValueGenerator $generator) : RandomString /**
* @param Entry<mixed> ...$entry
*/
row(Entry $entry) : Row rows(Row $row) : Rows /**
* @param array<Row> $rows
* @param array<Partition|string>|Partitions $partitions
*/
rows_partitioned(array $rows, Partitions|array $partitions) : Rows row_number() : RowNumber save_mode_append() : SaveMode save_mode_exception_if_exists() : SaveMode save_mode_ignore() : SaveMode save_mode_overwrite() : SaveMode skip_rows_handler() : SkipRows throw_error_handler() : ThrowError /**
* @param array<mixed> $data
*
* @return Entry<mixed>
*/
to_entry(string $name, ?mixed $data, EntryFactory $entryFactory) : Entry window() : Window EXTRACTOR
/**
* @param null|int<1, max> $min_size
*/
batched_by(Extractor $extractor, Reference|string $column, ?int $min_size) : BatchByExtractor /**
* @param int<1, max> $size
*/
batches(Extractor $extractor, int $size) : BatchExtractor files(Path|string $directory) : FilesExtractor from_all(Extractor $extractors) : ChainExtractor /**
* @param iterable<array<mixed>> $array
* @param null|Schema $schema - @deprecated use withSchema() method instead
*/
from_array(iterable $array, ?Schema $schema) : ArrayExtractor /**
* @param string $id - cache id from which data will be extracted
* @param null|Extractor $fallback_extractor - extractor that will be used when cache is empty - @deprecated use withFallbackExtractor() method instead
* @param bool $clear - clear cache after extraction - @deprecated use withClearOnFinish() method instead
*/
from_cache(string $id, ?Extractor $fallback_extractor, bool $clear) : CacheExtractor from_data_frame(DataFrame $data_frame) : DataFrameExtractor from_memory(Memory $memory) : MemoryExtractor from_path_partitions(Path|string $path) : PathPartitionsExtractor from_pipeline(Pipeline $pipeline) : PipelineExtractor from_rows(Rows $rows) : RowsExtractor from_sequence_date_period(string $entry_name, DateTimeInterface $start, DateInterval $interval, DateTimeInterface $end, int $options) : SequenceExtractor from_sequence_date_period_recurrences(string $entry_name, DateTimeInterface $start, DateInterval $interval, int $recurrences, int $options) : SequenceExtractor from_sequence_number(string $entry_name, string|int|float $start, string|int|float $end, int|float $step) : SequenceExtractor TRANSFORMER
add_row_index(string $column, StartFrom $startFrom) : AddRowIndex /**
* @param int<1, max> $size
*/
batch_size(int $size) : BatchSize drop(Reference|string $entries) : Drop limit(?int $limit) : Limit /**
* @param array<int, string> $columns
*/
mask_columns(array $columns, string $mask) : MaskColumns /**
* @param array<string, string> $renames Map of old_name => new_name
*/
rename_map(array $renames) : RenameMapEntryStrategy /**
* @param array<string>|string $search
* @param array<string>|string $replace
*/
rename_replace(array|string $search, array|string $replace) : RenameReplaceEntryStrategy rename_style(StringStyles $style) : RenameCaseEntryStrategy select(Reference|string $entries) : Select AGGREGATING_FUNCTION
average(EntryReference|string $ref, int $scale, Rounding $rounding) : Average collect(EntryReference|string $ref) : Collect collect_unique(EntryReference|string $ref) : CollectUnique count(?EntryReference $function) : Count first(EntryReference|string $ref) : First last(EntryReference|string $ref) : Last max(EntryReference|string $ref) : Max min(EntryReference|string $ref) : Min string_agg(EntryReference|string $ref, string $separator, ?SortOrder $sort) : StringAggregate sum(EntryReference|string $ref) : Sum SCALAR_FUNCTION
all(ScalarFunction $functions) : All any(ScalarFunction $values) : Any /**
* @param array<array-key, mixed>|ScalarFunction $ref
*/
array_exists(ScalarFunction|array $ref, ScalarFunction|string $path) : ArrayPathExists /**
* Expands each value into entry, if there are more than one value, multiple rows will be created.
* Array keys are ignored, only values are used to create new rows.
*
* Before:
* +--+-------------------+
* |id| array|
* +--+-------------------+
* | 1|{"a":1,"b":2,"c":3}|
* +--+-------------------+
*
* After:
* +--+--------+
* |id|expanded|
* +--+--------+
* | 1| 1|
* | 1| 2|
* | 1| 3|
* +--+--------+
*/
array_expand(ScalarFunction $function, ArrayExpand $expand) : ArrayExpand array_get(ScalarFunction $ref, ScalarFunction|string $path) : ArrayGet /**
* @param array<array-key, mixed>|ScalarFunction $keys
*/
array_get_collection(ScalarFunction $ref, ScalarFunction|array $keys) : ArrayGetCollection array_get_collection_first(ScalarFunction $ref, string $keys) : ArrayGetCollection array_keys_style_convert(ScalarFunction $ref, StringStyles|string $style) : ArrayKeysStyleConvert array_key_rename(ScalarFunction $ref, ScalarFunction|string $path, ScalarFunction|string $newName) : ArrayKeyRename /**
* @param array<array-key, mixed>|ScalarFunction $left
* @param array<array-key, mixed>|ScalarFunction $right
*/
array_merge(ScalarFunction|array $left, ScalarFunction|array $right) : ArrayMerge /**
* @param array<array-key, mixed>|ScalarFunction $array
*/
array_merge_collection(ScalarFunction|array $array) : ArrayMergeCollection /**
* @param array<array-key, mixed>|ScalarFunction $function
*/
array_reverse(ScalarFunction|array $function, ScalarFunction|bool $preserveKeys) : ArrayReverse array_sort(ScalarFunction $function, ScalarFunction|Sort|null $sort_function, ScalarFunction|int|null $flags, ScalarFunction|bool $recursive) : ArraySort /**
* @param array<array-key, mixed>|ScalarFunction $array
* @param array<array-key, mixed>|ScalarFunction $skip_keys
*/
array_unpack(ScalarFunction|array $array, ScalarFunction|array $skip_keys, ScalarFunction|string|null $entry_prefix) : ArrayUnpack between(?mixed $value, ?mixed $lower_bound, ?mixed $upper_bound, ScalarFunction|Boundary $boundary) : Between /**
* Calls a user-defined function with the given parameters.
*
* @param callable|ScalarFunction $callable
* @param array<mixed> $parameters
* @param null|Type<mixed> $return_type
*/
call(ScalarFunction|callable $callable, array $parameters, ?Type $return_type) : CallUserFunc capitalize(ScalarFunction|string $value) : Capitalize /**
* @param \Flow\Types\Type<mixed>|string $type
*/
cast(?mixed $value, Type|string $type) : Cast coalesce(ScalarFunction $values) : Coalesce /**
* @param array<array-key, mixed>|ScalarFunction $keys
* @param array<array-key, mixed>|ScalarFunction $values
*/
combine(ScalarFunction|array $keys, ScalarFunction|array $values) : Combine /**
* Concat all values. If you want to concatenate values with separator use concat_ws function.
*/
concat(ScalarFunction|string $functions) : Concat /**
* Concat all values with separator.
*/
concat_ws(ScalarFunction|string $separator, ScalarFunction|string $functions) : ConcatWithSeparator date_time_format(ScalarFunction $ref, string $format) : DateTimeFormat /**
* An alias for `ref`.
*/
entry(string $entry) : EntryReference exists(ScalarFunction $ref) : Exists greatest(?mixed $values) : Greatest hash(?mixed $value, Algorithm $algorithm) : Hash least(?mixed $values) : Least list_ref(string $entry) : ListFunctions lit(?mixed $value) : Literal lower(ScalarFunction|string $value) : ToLower /**
* @param array<MatchCondition> $cases
*/
match_cases(array $cases, ?mixed $default) : MatchCases match_condition(?mixed $condition, ?mixed $then) : MatchCondition not(ScalarFunction $value) : Not now(DateTimeZone|ScalarFunction $time_zone) : Now number_format(ScalarFunction|int|float $value, ScalarFunction|int $decimals, ScalarFunction|string $decimal_separator, ScalarFunction|string $thousands_separator) : NumberFormat optional(ScalarFunction $function) : Optional ref(string $entry) : EntryReference refs(Reference|string $entries) : References regex(ScalarFunction|string $pattern, ScalarFunction|string $subject, ScalarFunction|int $flags, ScalarFunction|int $offset) : Regex regex_all(ScalarFunction|string $pattern, ScalarFunction|string $subject, ScalarFunction|int $flags, ScalarFunction|int $offset) : RegexAll regex_match(ScalarFunction|string $pattern, ScalarFunction|string $subject, ScalarFunction|int $flags, ScalarFunction|int $offset) : RegexMatch regex_match_all(ScalarFunction|string $pattern, ScalarFunction|string $subject, ScalarFunction|int $flags, ScalarFunction|int $offset) : RegexMatchAll regex_replace(ScalarFunction|string $pattern, ScalarFunction|string $replacement, ScalarFunction|string $subject, ScalarFunction|int|null $limit) : RegexReplace round(ScalarFunction|int|float $value, ScalarFunction|int $precision, ScalarFunction|int $mode) : Round sanitize(ScalarFunction|string $value, ScalarFunction|string $placeholder, ScalarFunction|int|null $skipCharacters) : Sanitize size(?mixed $value) : Size split(ScalarFunction|string $value, ScalarFunction|string $separator, ScalarFunction|int $limit) : Split sprintf(ScalarFunction|string $format, ScalarFunction|string|int|float|null $args) : Sprintf structure_ref(string $entry) : StructureFunctions to_date(?mixed $ref, ScalarFunction|string $format, ScalarFunction|DateTimeZone $timeZone) : ToDate to_date_time(?mixed $ref, ScalarFunction|string $format, ScalarFunction|DateTimeZone $timeZone) : ToDateTime to_timezone(ScalarFunction|DateTimeInterface $value, ScalarFunction|DateTimeZone|string $timeZone) : ToTimeZone ulid(ScalarFunction|string|null $value) : Ulid upper(ScalarFunction|string $value) : ToUpper uuid_v4() : Uuid uuid_v7(ScalarFunction|DateTimeInterface|null $value) : Uuid when(?mixed $condition, ?mixed $then, ?mixed $else) : When WINDOW_FUNCTION
COMPARISON
compare_all(Comparison $comparisons) : All compare_any(Comparison $comparisons) : Any equal(Reference|string $left, Reference|string $right) : Equal identical(Reference|string $left, Reference|string $right) : Identical HELPER
analyze() : Analyze clock(string $time_zone) : ClockInterface constraint_sorted_by(Reference|string $column, Reference|string $columns) : SortedByConstraint constraint_unique(string $reference, string $references) : UniqueConstraint date_interval_to_microseconds(DateInterval $interval) : int date_interval_to_milliseconds(DateInterval $interval) : int date_interval_to_seconds(DateInterval $interval) : int delay_exponential(Duration $base, int $multiplier, ?Duration $max_delay) : Exponential delay_fixed(Duration $delay) : Fixed /**
* @param float $jitter_factor a value between 0 and 1 representing the maximum percentage of jitter to apply
*/
delay_jitter(DelayFactory $delay, float $jitter_factor) : Jitter delay_linear(Duration $delay, Duration $increment) : Linear duration_microseconds(int $microseconds) : Duration duration_milliseconds(int $milliseconds) : Duration duration_minutes(int $minutes) : Duration duration_seconds(int $seconds) : Duration retry_any_throwable(int $limit) : AnyThrowable /**
* @param array<class-string<\Throwable>> $exception_types
*/
retry_on_exception_types(array $exception_types, int $limit) : OnExceptionTypes schema_evolving_validator() : EvolvingValidator /**
* @return Schema
*/
schema_from_json(string $schema) : Schema /**
* @param array<string, array<bool|float|int|string>|bool|float|int|string> $metadata
*/
schema_metadata(array $metadata) : Metadata schema_selective_validator() : SelectiveValidator schema_strict_validator() : StrictValidator /**
* @param Schema $schema
*/
schema_to_ascii(Schema $schema, ?SchemaFormatter $formatter) : string /**
* @param Schema $schema
*/
schema_to_json(Schema $schema, bool $pretty) : string /**
* @param Schema $schema
*/
schema_to_php(Schema $schema, ValueFormatter $valueFormatter, TypeFormatter $typeFormatter) : string /**
* @param Schema $expected
* @param Schema $given
*/
schema_validate(Schema $expected, Schema $given, SchemaValidator $validator) : bool telemetry_options(bool $trace_loading, bool $trace_transformations, bool $trace_cache, bool $collect_metrics, ?FilesystemTelemetryOptions $filesystem) : TelemetryOptions with_entry(string $name, ScalarFunction $function) : WithEntry LOADER
/**
* Convert rows to an array and store them in passed array variable.
*
* @param array<array-key, mixed> $array
*
* @param-out array<array<mixed>> $array
*/
to_array(array $array) : ArrayLoader to_branch(ScalarFunction $condition, Loader $loader) : BranchingLoader to_callable(callable $callable) : CallbackLoader to_memory(Memory $memory) : MemoryLoader to_output(int|bool $truncate, Output $output, Formatter $formatter, SchemaFormatter $schemaFormatter) : StreamLoader to_stderr(int|bool $truncate, Output $output, Formatter $formatter, SchemaFormatter $schemaFormatter) : StreamLoader to_stdout(int|bool $truncate, Output $output, Formatter $formatter, SchemaFormatter $schemaFormatter) : StreamLoader to_stream(string $uri, int|bool $truncate, Output $output, string $mode, Formatter $formatter, SchemaFormatter $schemaFormatter) : StreamLoader to_transformation(Transformer|Transformation $transformer, Loader $loader) : TransformerLoader write_with_retries(Loader $loader, RetryStrategy $retry_strategy, DelayFactory $delay_factory, Sleep $sleep) : RetryLoader