Concat
Function to concatenate two strings.
Parameters
The string to be concatenated with the base string.
Returns
Returns an expression object denoting the result of the concact
expression.
The resulting expression is of type str
or Optional[str]
depending on
either of input/item being nullable.
Errors
The str
namespace must be invoked on an expression that evaluates to string
or optional of string. Similarly, item
must evaluate to either a string or an
optional of string.
1from fennel.expr import col
2
3expr = col("x").str.concat(col("y"))
4
5assert expr.typeof(schema={"x": str, "y": str}) == str
6assert expr.typeof(schema={"x": str, "y": Optional[str]}) == Optional[str]
7assert expr.typeof(schema={"x": Optional[str], "y": str}) == Optional[str]
8assert (
9 expr.typeof(schema={"x": Optional[str], "y": Optional[str]})
10 == Optional[str]
11)
12
13# can be evaluated with a dataframe
14df = pd.DataFrame(
15 {
16 "x": ["hello", "world", "some", None],
17 "y": [" world", " hello", None, None],
18 }
19)
20schema = {"x": Optional[str], "y": Optional[str]}
21assert expr.eval(df, schema=schema).tolist() == [
22 "hello world",
23 "world hello",
24 pd.NA,
25 pd.NA,
26]
27
28# schema of both columns must be str
29with pytest.raises(ValueError):
30 expr.typeof(schema={"x": str})
31
32with pytest.raises(Exception):
33 expr.typeof(schema={"x": str, "y": int})
python
Contains
Function to check if the given string contains another string.
Parameters
contains
check if the base string contains item
or not.
Returns
Returns an expression object denoting the result of the contains
expression.
The resulting expression is of type bool
or Optional[bool]
depending on
either of input/item being nullable.
Errors
The str
namespace must be invoked on an expression that evaluates to string
or optional of string. Similarly, item
must evaluate to either a string or an
optional of string.
1from fennel.expr import col
2
3expr = col("x").str.contains(col("y"))
4
5assert expr.typeof(schema={"x": str, "y": str}) == bool
6assert expr.typeof(schema={"x": str, "y": Optional[str]}) == Optional[bool]
7assert expr.typeof(schema={"x": Optional[str], "y": str}) == Optional[bool]
8assert (
9 expr.typeof(schema={"x": Optional[str], "y": Optional[str]})
10 == Optional[bool]
11)
12
13# can be evaluated with a dataframe
14df = pd.DataFrame(
15 {
16 "x": ["hello", "world", "some", None],
17 "y": ["ell", "random", None, None],
18 }
19)
20schema = {"x": Optional[str], "y": Optional[str]}
21assert expr.eval(df, schema=schema).tolist() == [True, False, pd.NA, pd.NA]
22
23# schema of both columns must be str
24with pytest.raises(ValueError):
25 expr.typeof(schema={"x": str})
26
27with pytest.raises(Exception):
28 expr.typeof(schema={"x": str, "y": int})
python
Ends With
Function to check if the given string ends with the given another string.
Parameters
endswith
checks if the input string ends with the expression item
.
Returns
Returns an expression object denoting the result of the endswith
expression.
The resulting expression is of type bool
or Optional[bool]
depending on
either of input/item being nullable.
Errors
The str
namespace must be invoked on an expression that evaluates to string
or optional of string. Similarly, item
must evaluate to either a string or an
optional of string.
1from fennel.expr import col
2
3expr = col("x").str.endswith(col("y"))
4
5assert expr.typeof(schema={"x": str, "y": str}) == bool
6assert expr.typeof(schema={"x": str, "y": Optional[str]}) == Optional[bool]
7assert expr.typeof(schema={"x": Optional[str], "y": str}) == Optional[bool]
8assert (
9 expr.typeof(schema={"x": Optional[str], "y": Optional[str]})
10 == Optional[bool]
11)
12
13# can be evaluated with a dataframe
14df = pd.DataFrame(
15 {
16 "x": ["hello", "world", "some", None],
17 "y": ["lo", "wor", None, None],
18 }
19)
20schema = {"x": Optional[str], "y": Optional[str]}
21assert expr.eval(df, schema=schema).tolist() == [True, False, pd.NA, pd.NA]
22
23# schema of both columns must be str
24with pytest.raises(ValueError):
25 expr.typeof(schema={"x": str})
26
27with pytest.raises(Exception):
28 expr.typeof(schema={"x": str, "y": int})
python
Json Extract
Function to extract a value from a json encoded string using a json path.
Parameters
The json path to use when extracting the value from the json encoded string. See this page for more details on json path syntax. The extracted value is always returned as a string or None if the path is not valid/found.
Returns
Returns an expression object denoting the result of the json_extract
expression.
The resulting expression is of type Optional[str]
and more specifically is None
when the base string is None or the path is not found in the json encoded string.
Errors
The str
namespace must be invoked on an expression that evaluates to string
or optional of string.
1from fennel.expr import col
2
3expr = col("s").str.json_extract("$.x.y")
4
5# return type is always Optional[str]
6assert expr.typeof(schema={"s": str}) == Optional[str]
7assert expr.typeof(schema={"s": Optional[str]}) == Optional[str]
8
9# can be evaluated with a dataframe
10df = pd.DataFrame(
11 {"s": ['{"x": {"y": "hello"}}', '{"x": {"y": 1}}', "{}", None]}
12)
13schema = {"s": Optional[str]}
14# NOTE that the integer value 1 is returned as a string and not an int
15# also invalid paths (e.g. "$.x.y" in case 3 of "{}") return null
16assert expr.eval(df, schema).tolist() == ["hello", "1", pd.NA, pd.NA]
python
Len
Function to get the length of a string
Returns
Returns an expression object denoting the result of the len
function.
The resulting expression is of type int
or Optional[int]
depending on
input being nullable.
Errors
The str
namespace must be invoked on an expression that evaluates to string
or optional of string.
1from fennel.expr import col
2
3expr = col("x").str.len()
4
5assert expr.typeof(schema={"x": str}) == int
6assert expr.typeof(schema={"x": Optional[str]}) == Optional[int]
7
8# can be evaluated with a dataframe
9df = pd.DataFrame({"x": ["hello", "world", "some", None]})
10schema = {"x": Optional[str]}
11assert expr.eval(df, schema=schema).tolist() == [5, 5, 4, pd.NA]
12
13# schema of column must be str
14with pytest.raises(ValueError):
15 expr.typeof(schema={"x": int})
python
Lower
Function to convert a string to all lowercase letters.
Returns
Returns an expression object denoting the result of the lower
function.
The resulting expression is of type str
or Optional[str]
depending on
input being nullable.
Errors
The str
namespace must be invoked on an expression that evaluates to string
or optional of string.
1from fennel.expr import col
2
3expr = col("x").str.lower()
4
5assert expr.typeof(schema={"x": str}) == str
6assert expr.typeof(schema={"x": Optional[str]}) == Optional[str]
7
8# can be evaluated with a dataframe
9df = pd.DataFrame({"x": ["HeLLo", "World", "some", None]})
10schema = {"x": Optional[str]}
11assert expr.eval(df, schema=schema).tolist() == [
12 "hello",
13 "world",
14 "some",
15 pd.NA,
16]
17
18# schema of column must be str
19with pytest.raises(ValueError):
20 expr.typeof(schema={"x": int})
python
Parse
Function to parse an object of the given type out of a string that represents json encoded data.
Parameters
The type of the data should be parsed from the json encoded string.
Returns
Returns an expression object denoting the result of the parse
expression.
The resulting expression is of type dtype
or Optional[dtype]
depending on
the base string being nullable.
A type can only be parsed out of valid json representation of that type. For
instance, a str
can not be parsed out of "hi"
because the correct json
representation of the string is "\"hi\""
.
Errors
The str
namespace must be invoked on an expression that evaluates to string
or optional of string.
If the given string can not be parsed into an object of the given type, a runtime error is raised.
1from fennel.expr import col, lit
2
3expr = col("x").str.parse(list[int])
4
5assert expr.typeof(schema={"x": str}) == List[int]
6assert expr.typeof(schema={"x": Optional[str]}) == Optional[List[int]]
7
8# can be evaluated with a dataframe
9df = pd.DataFrame({"x": ["[1, 2, 3]", "[4, 5]", None]})
10schema = {"x": Optional[str]}
11assert expr.eval(df, schema=schema).tolist() == [[1, 2, 3], [4, 5], pd.NA]
12
13# schema of column must be str
14with pytest.raises(ValueError):
15 expr.typeof(schema={"x": int})
16
17# can use this to parse several common types
18df = pd.DataFrame({"x": ["1"]})
19schema = {"x": str}
20cases = [
21 ("1", int, 1),
22 ("1.1", float, 1.1),
23 ("true", bool, True),
24 ("false", bool, False),
25 ('"hi"', str, "hi"),
26]
27for case in cases:
28 expr = lit(case[0]).str.parse(case[1])
29 assert expr.eval(df, schema).tolist() == [case[2]]
python
1from fennel.expr import col, lit
2
3invalids = [
4 ("False", bool), # "False" is not valid json, "false" is
5 ("hi", str), # "hi" is not valid json, "\"hi\"" is
6 ("[1, 2, 3", List[int]),
7 ("1.1.1", float),
8]
9for invalid in invalids:
10 expr = lit(invalid[0]).str.parse(invalid[1])
11 df = pd.DataFrame({"x": ["1"]})
12 schema = {"x": str}
13 with pytest.raises(Exception):
14 expr.eval(df, schema)
python
1from fennel.expr import col, lit
2from fennel.dtypes import struct
3
4@struct
5class MyStruct:
6 x: int
7 y: Optional[bool]
8
9cases = [
10 ('{"x": 1, "y": true}', MyStruct(1, True)),
11 ('{"x": 2, "y": null}', MyStruct(2, None)),
12 ('{"x": 3}', MyStruct(3, None)),
13]
14for case in cases:
15 expr = lit(case[0]).str.parse(MyStruct)
16 df = pd.DataFrame({"x": ["1"]})
17 schema = {"x": str}
18 found = expr.eval(df, schema).tolist()
19 assert len(found) == 1
20 assert found[0].x == case[1].x
python
Split
Function to split a string into a list of strings using a separator.
Parameters
The separator string to use when splitting the string.
Returns
Returns an expression object denoting the result of the split
function.
The resulting expression is of type List[str]
or Optional[List[str]]
depending on
input being nullable.
Errors
The str
namespace must be invoked on an expression that evaluates to string
or optional of string.
1from fennel.expr import col
2
3expr = col("s").str.split(",")
4
5assert expr.typeof(schema={"s": str}) == List[str]
6assert expr.typeof(schema={"s": Optional[str]}) == Optional[List[str]]
7
8# can be evaluated with a dataframe
9df = pd.DataFrame({"s": ["a,b,c", "d,e", "f", None]})
10schema = {"s": Optional[str]}
11assert expr.eval(df, schema).tolist() == [
12 ["a", "b", "c"],
13 ["d", "e"],
14 ["f"],
15 pd.NA,
16]
python
Starts With
Function to check if the given string starts with another string.
Parameters
startswith
checks if the input string starts with the expression item
.
Returns
Returns an expression object denoting the result of the startswith
expression.
The resulting expression is of type bool
or Optional[bool]
depending on
either of input/item being nullable.
Errors
The str
namespace must be invoked on an expression that evaluates to string
or optional of string. Similarly, item
must evaluate to either a string or an
optional of string.
1from fennel.expr import col
2
3expr = col("x").str.startswith(col("y"))
4
5assert expr.typeof(schema={"x": str, "y": str}) == bool
6assert expr.typeof(schema={"x": str, "y": Optional[str]}) == Optional[bool]
7assert expr.typeof(schema={"x": Optional[str], "y": str}) == Optional[bool]
8assert (
9 expr.typeof(schema={"x": Optional[str], "y": Optional[str]})
10 == Optional[bool]
11)
12
13# can be evaluated with a dataframe
14df = pd.DataFrame(
15 {
16 "x": ["hello", "world", "some", None],
17 "y": ["he", "rld", None, None],
18 }
19)
20schema = {"x": Optional[str], "y": Optional[str]}
21assert expr.eval(df, schema=schema).tolist() == [True, False, pd.NA, pd.NA]
22
23# schema of both columns must be str
24with pytest.raises(ValueError):
25 expr.typeof(schema={"x": str})
26
27with pytest.raises(Exception):
28 expr.typeof(schema={"x": str, "y": int})
python
Strptime
Function to parse a datetime of the given format out of the string.
Parameters
A valid datetime format string. See here for a full list of all format qualifiers supported by Fennel.
Default: UTC
Sometimes format strings don't precisely specify the timezone. In such cases, a timezone can be provided. In absence of an explicit timezone, all ambiguous strings are assumed to be in UTC.
Note that timezone
is merely a hint to resolve disambiguity - the timezone
info from the format string is preferentially used when available.
Returns
Returns an expression object denoting the result of the strptime
expression.
The resulting expression is of type datetime
or Optional[datetime]
depending on
either of input/item being nullable.
Errors
The str
namespace must be invoked on an expression that evaluates to string
or optional of string.
Compile time error is raised if either of the format string or timezone is invalid.
1from fennel.expr import col
2from datetime import datetime
3
4expr = col("x").str.strptime("%Y-%m-%d")
5
6assert expr.typeof(schema={"x": str}) == datetime
7assert expr.typeof(schema={"x": Optional[str]}) == Optional[datetime]
8
9df = pd.DataFrame({"x": ["2021-01-01", "2021-02-01", None]})
10schema = {"x": Optional[str]}
11assert expr.eval(df, schema).tolist() == [
12 pd.Timestamp(2021, 1, 1, tz="UTC"),
13 pd.Timestamp(2021, 2, 1, tz="UTC"),
14 pd.NaT,
15]
16
17# can also provide a timezone
18expr = col("x").str.strptime("%Y-%m-%d", timezone="Asia/Tokyo")
19
20assert expr.eval(df, schema).tolist() == [
21 pd.Timestamp(2021, 1, 1, tz="Asia/Tokyo"),
22 pd.Timestamp(2021, 2, 1, tz="Asia/Tokyo"),
23 pd.NaT,
24]
25
26# error on invalid format - %L is not a valid format
27expr = col("x").str.strptime("%Y-%m-%d %L)")
28with pytest.raises(Exception):
29 expr.eval(df, schema)
30
31# error on invalid timezone
32expr = col("x").str.strptime("%Y-%m-%d", timezone="invalid")
33with pytest.raises(Exception):
34 expr.eval(df, schema)
python
Upper
Function to convert a string to all upper case letters.
Returns
Returns an expression object denoting the result of the upper
function.
The resulting expression is of type str
or Optional[str]
depending on
input being nullable.
Errors
The str
namespace must be invoked on an expression that evaluates to string
or optional of string.
1from fennel.expr import col
2
3expr = col("x").str.upper()
4
5assert expr.typeof(schema={"x": str}) == str
6assert expr.typeof(schema={"x": Optional[str]}) == Optional[str]
7
8# can be evaluated with a dataframe
9df = pd.DataFrame({"x": ["HeLLo", "World", "some", None]})
10schema = {"x": Optional[str]}
11assert expr.eval(df, schema=schema).tolist() == [
12 "HELLO",
13 "WORLD",
14 "SOME",
15 pd.NA,
16]
17
18# schema of column must be str
19with pytest.raises(ValueError):
20 expr.typeof(schema={"x": int})
python