Skip to content

[FSTORE-1008][APPEND] create feature groups with struct feature #307

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions integrations/java/java/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

## Introduction
In this tutorial you will learn how to fetch feature vectors from online feature store for near real-time model serving
using external java application.
using external java application.

## Clone tutorials repository
This section requires maven; java 1.8 and git.

```bash
git clone https://github.com/logicalclocks/hopsworks-tutorials
cd ./hopsworks-tutorials/java
cd ./hopsworks-tutorials/integrations/java/java
mvn clean package
```

Expand All @@ -21,16 +21,17 @@ Then define environment variables

```bash
HOPSWORKS_HOST=REPLACE_WITH_YOUR_HOPSWORKS_CLUSTER_HOST
HOPSWORKS_PORT=REPLACE_WITH_YOUR_HOPSWORKS_CLUSTER_PORT
HOPSWORKS_API_KEY=REPLACE_WITH_YOUR_HOPSWORKS_API_KEY
HOPSWORKS_PROJECT_NAME=REPLACE_WITH_YOUR_HOPSWORKS_PROJECT_NAME

FEATURE_GROUP_NAME=java_data
FEATURE_GROUP_VERSION=1
FEATURE_VIEW_NAME=products_fv
FEATURE_VIEW_VERSION=1
```
FEATURE_VIEW_VERSION=1```

```bash
python3 ./setup_fv_fg.py --host $HOPSWORKS_HOST --api_key $HOPSWORKS_API_KEY --project $HOPSWORKS_PROJECT_NAME --feature_group_name $FEATURE_GROUP_NAME --feature_group_version $FEATURE_GROUP_VERSION --feature_view_name $FEATURE_VIEW_NAME --feature_view_version $FEATURE_VIEW_VERSION
java -jar ./target/hopsworks-java-tutorial-3.9.0-RC9-jar-with-dependencies.jar $HOPSWORKS_HOST $HOPSWORKS_API_KEY $HOPSWORKS_PROJECT_NAME $FEATURE_GROUP_NAME $FEATURE_GROUP_VERSION $FEATURE_VIEW_NAME $FEATURE_VIEW_VERSION
python3 ./setup_fv_fg.py --host $HOPSWORKS_HOST --port $HOPSWORKS_PORT --api_key $HOPSWORKS_API_KEY --project $HOPSWORKS_PROJECT_NAME --feature_group_name $FEATURE_GROUP_NAME --feature_group_version $FEATURE_GROUP_VERSION --feature_view_name $FEATURE_VIEW_NAME --feature_view_version $FEATURE_VIEW_VERSION
java -jar ./target/hopsworks-java-tutorial-4.3.0-SNAPSHOT-jar-with-dependencies.jar $HOPSWORKS_HOST $HOPSWORKS_PORT
$HOPSWORKS_API_KEY $HOPSWORKS_PROJECT_NAME $FEATURE_GROUP_NAME $FEATURE_GROUP_VERSION $FEATURE_VIEW_NAME $FEATURE_VIEW_VERSION
```
2 changes: 1 addition & 1 deletion integrations/java/java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>com.hopsworks.tutorials</groupId>
<artifactId>hopsworks-java-tutorial</artifactId>
<version>3.9.0-RC9</version>
<version>4.3.0-SNAPSHOT</version>

<properties>
<maven.compiler.source>1.8</maven.compiler.source>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package com.hopsworks.tutorials;


import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.UUID;

public class JavaStructGenerator {

private static final Random random = new Random();

/**
* Generates a random microsecond timestamp.
* This value is computed between Jan 1, 2000 and the current time.
*/
private static Long getRandomMicroTimestamp() {
// Jan 1, 2000 in milliseconds (946684800000L)
long startMillis = 946684800000L;
long nowMillis = System.currentTimeMillis();
long randomMillis = startMillis + (long) (random.nextDouble() * (nowMillis - startMillis));
return randomMillis * 1000; // convert milliseconds to microseconds
}

/**
* Generates a random instance of JavaStructPojo.
*/
public static JavaStructPojo generateJavaStructPojo(Integer id) {
// Generate a random primary key
String pk = id.toString();
// Generate a random event_time (timestamp in microseconds)
Long eventTime = getRandomMicroTimestamp();

// Create a random list of S_feat items (between 1 and 5 items)
int featSize = random.nextInt(5) + 1;
List<JavaStructPojo.S_feat> featList = new ArrayList<>();
for (int i = 0; i < featSize; i++) {
featList.add(generateRandomS_feat());
}

return new JavaStructPojo(pk, eventTime, featList);
}

/**
* Generates a random GenericRecord for the given schema.
*/
public static GenericRecord generateRandomRecord(Schema schema, Integer id) {
GenericRecord record = new GenericData.Record(schema);

// Wrap "pk" in union [null, string]
Schema pkSchema = schema.getField("pk").schema();
record.put("pk", GenericData.get().deepCopy(pkSchema.getTypes().get(1), id.toString()));

// Wrap "event_time" in union [null, timestamp-micros]
Schema eventTimeSchema = schema.getField("event_time").schema();
record.put("event_time", GenericData.get().deepCopy(eventTimeSchema.getTypes().get(1), getRandomMicroTimestamp()));

// feat: [null, array<union[null, S_feat]>]
Schema featUnionSchema = schema.getField("feat").schema();
Schema arraySchema = featUnionSchema.getTypes().get(1); // array<union[null, S_feat]>
Schema elementUnionSchema = arraySchema.getElementType(); // union[null, S_feat]
Schema sFeatSchema = elementUnionSchema.getTypes().get(1); // S_feat

List<Object> featList = new ArrayList<>();
for (int i = 0; i < random.nextInt(5) + 1; i++) {
GenericRecord sFeat = new GenericData.Record(sFeatSchema);
sFeat.put("sku", GenericData.get().deepCopy(
sFeatSchema.getField("sku").schema().getTypes().get(1),
"SKU-" + UUID.randomUUID().toString().substring(0, 8))
);
sFeat.put("ts", GenericData.get().deepCopy(
sFeatSchema.getField("ts").schema().getTypes().get(1),
getRandomMicroTimestamp())
);

// no createUnion, just add the record
Object unionWrappedSFeat = GenericData.get().deepCopy(elementUnionSchema, sFeat);
featList.add(unionWrappedSFeat);
}

// no wrapping for union of feat, just set the list (or null)
record.put("feat", featList);

return record;
}

/**
* Generates a random S_feat instance.
*/
public static JavaStructPojo.S_feat generateRandomS_feat() {
String sku = "SKU-" + UUID.randomUUID().toString().substring(0, 8);
Long ts = getRandomMicroTimestamp();
return new JavaStructPojo.S_feat(sku, ts);
}

public static List<JavaStructPojo> generateData(int size) {

List<JavaStructPojo> rows = new ArrayList<>(size);

for (int i = 0; i < size; i++) {
JavaStructPojo data = generateJavaStructPojo(i);
rows.add(data);
}
return rows;
}

/**
* Test method to generate and print a random Avro record.
*/
public static List<GenericRecord> generateGenericRecordData(Schema schema, int size) throws IOException {
List<GenericRecord> rows = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
GenericRecord data = generateRandomRecord(schema, i);
rows.add(data);
}
return rows;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package com.hopsworks.tutorials;

import org.apache.avro.reflect.Nullable;
import org.apache.avro.reflect.AvroSchema;
import java.util.List;

public class JavaStructPojo {

@Nullable
// This field is a union of null and string.
private String pk;

@Nullable
// Avro will treat this as a long with the logical type "timestamp-micros".
@AvroSchema("{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"}")
private Long event_time;

@Nullable
// This is a union of null and an array of unions (null or S_feat).
private List<S_feat> feat;

// Default constructor
public JavaStructPojo() {}

// Parameterized constructor
public JavaStructPojo(String pk, Long event_time, List<S_feat> feat) {
this.pk = pk;
this.event_time = event_time;
this.feat = feat;
}

public String getPk() {
return pk;
}

public void setPk(String pk) {
this.pk = pk;
}

public Long getEvent_time() {
return event_time;
}

public void setEvent_time(Long event_time) {
this.event_time = event_time;
}

public List<S_feat> getFeat() {
return feat;
}

public void setFeat(List<S_feat> feat) {
this.feat = feat;
}

@Override
public String toString() {
return "JavaStructPojo{" +
"pk='" + pk + '\'' +
", event_time=" + event_time +
", feat=" + feat +
'}';
}

// Nested static class corresponding to the S_feat record.
public static class S_feat {
@Nullable
// Union of null and string.
private String sku;

@Nullable
// Avro will treat this as a long with the logical type "timestamp-micros".
@AvroSchema("{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"}")
private Long ts;

// Default constructor
public S_feat() {}

// Parameterized constructor
public S_feat(String sku, Long ts) {
this.sku = sku;
this.ts = ts;
}

public String getSku() {
return sku;
}

public void setSku(String sku) {
this.sku = sku;
}

public Long getTs() {
return ts;
}

public void setTs(Long ts) {
this.ts = ts;
}

@Override
public String toString() {
return "S_feat{" +
"sku='" + sku + '\'' +
", ts=" + ts +
'}';
}
}
}
Loading