Skip to content

Commit

Permalink
trained embeddings added
Browse files Browse the repository at this point in the history
  • Loading branch information
nishant-sachdeva committed Feb 13, 2025
1 parent ca5f884 commit 05fb1a8
Show file tree
Hide file tree
Showing 151 changed files with 8,055 additions and 7,458 deletions.
1 change: 1 addition & 0 deletions seed_embeddings/OpenKE/config/Trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(
self.analogies = analogy.AnalogyScorer(analogy_file=analogy_file)

def load_entity_names(self, index_dir):
print("Loading entity names...", index_dir)
with open(os.path.join(index_dir, "entity2id.txt")) as fEntity:
content = fEntity.read()

Expand Down
7 changes: 4 additions & 3 deletions seed_embeddings/OpenKE/generate_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def train(arg_conf):
alpha=0.00729,
opt_method="Adam",
checkpoint_dir=checkpoint_dir,
index_dir=arg_conf.index_dir,
)
trainer.run(
link_prediction=False,
Expand Down Expand Up @@ -137,10 +138,10 @@ def findRep(src, dest, index_dir):
metavar="DIRECTORY",
help="Location of the directory entity2id.txt, train2id.txt and relation2id.txt",
required=False,
default="../seed_embeddings/preprocessed/",
default="../preprocessed/",
)
parser.add_argument(
"--epoch", dest="epoch", help="Epochs", required=False, type=int, default=1000
"--epoch", dest="epoch", help="Epochs", required=False, type=int, default=20
)

parser.add_argument(
Expand Down Expand Up @@ -188,7 +189,7 @@ def findRep(src, dest, index_dir):
),
)

findRep(outfilejson, seedfile, arg_conf.index_dir)
findRep(outfile, seedfile, arg_conf.index_dir)

print("Training finished...")
print("seed file : ", seedfile)
11 changes: 8 additions & 3 deletions seed_embeddings/OpenKE/generate_embedding_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ def reformat_embeddings(input_str):
metric = "loss"
mode = "min"

print(metric, mode)

scheduler = ASHAScheduler(
time_attr="training_iteration",
max_t=arg_conf.epoch,
Expand All @@ -270,11 +272,12 @@ def reformat_embeddings(input_str):
mode=mode,
)
optuna = OptunaSearch(metric="loss", mode="min")
print("GPU usage ", arg_conf.use_gpu)

if arg_conf.use_gpu:
train_with_resources = tune.with_resources(
tune.with_parameters(train, args=arg_conf),
resources={"cpu": 8, "gpu": 0.15},
resources={"cpu": 2, "gpu": 0.11},
)
else:
train_with_resources = tune.with_resources(
Expand All @@ -286,9 +289,9 @@ def reformat_embeddings(input_str):
param_space=search_space,
tune_config=TuneConfig(
search_alg=optuna,
max_concurrent_trials=12,
max_concurrent_trials=4,
scheduler=scheduler,
num_samples=128,
num_samples=16,
),
run_config=RunConfig(
storage_path=arg_conf.storage_path,
Expand Down Expand Up @@ -336,6 +339,7 @@ def reformat_embeddings(input_str):
# Construct the output file name using the best hyperparameters
outfile = os.path.join(
index_dir,
"embeddings/",
"seedEmbedding_{}_{}Dim_{}Alpha_{}batchsize_{}margin.ckpt".format(
metric,
dim,
Expand All @@ -357,6 +361,7 @@ def reformat_embeddings(input_str):

embeddings_path = os.path.join(
index_dir,
"embeddings/",
"seedEmbedding_{}_{}Dim_{}Alpha_{}batchsize_{}margin.ckpt".format(
metric,
dim,
Expand Down
4 changes: 4 additions & 0 deletions seed_embeddings/OpenKE/launch_training.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#! /bin/bash

python3 generate_embedding_ray.py --is_analogy True --use_gpu True --dim 75 --index_dir "/home/intern23002/iitH/ir2vec/IR2Vec/seed_embeddings/preprocessed" \
--analogy_file "/home/intern23002/iitH/ir2vec/IR2Vec/seed_embeddings/OpenKE/analogies.txt"
6 changes: 3 additions & 3 deletions seed_embeddings/triplets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ if [ -z $4 ]; then
exit
fi

LLVM_BUILD=$5
LLVM_BUILD=/usr

if [ -z $LLVM_BUILD ]; then
echo "5st arg should have a valid Build path"
Expand Down Expand Up @@ -70,13 +70,13 @@ while read p; do
fi
USED_OPT[$a]=$opt
DEBUG echo "opt from $opt"
${LLVM_BUILD}/bin/opt-19 -S -$opt $p -o $tmpfile
${LLVM_BUILD}/bin/opt-20 -S -$opt $p -o $tmpfile
$COLLECT_BUILD/bin/ir2vec -collectIR -o $4 $tmpfile &>/dev/null
let "a++"
rm "$tmpfile"
done &
if [ $counter == 100 ]; then
sleep 20
sleep 3
counter=0
fi

Expand Down
4 changes: 2 additions & 2 deletions src/test-suite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ endif()

# sanity checks and lit configs
configure_file(sanity_check.sh.cmake sanity_check.sh @ONLY)
file(COPY PE-benchmarks-llfiles-llvm19 DESTINATION ./)
file(COPY PE-benchmarks-llfiles-llvm20 DESTINATION ./)
file(COPY sqlite3.ll DESTINATION ./)
file(COPY oracle DESTINATION ./)
file(COPY ../../vocabulary DESTINATION ./)
file(COPY index-llvm19.files DESTINATION ./)
file(COPY index-llvm20.files DESTINATION ./)


configure_file(lit.site.cfg.py.in lit.site.cfg.py @ONLY)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; ModuleID = 'PE-benchmarks/Find_the_closest_pair_from_two_sorted_arrays.cpp'
source_filename = "PE-benchmarks/Find_the_closest_pair_from_two_sorted_arrays.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
target triple = "x86_64-pc-linux-gnu"

module asm ".globl _ZSt21ios_base_library_initv"

Expand Down Expand Up @@ -195,6 +195,6 @@ attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: readwrite
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"clang version 19.1.7 (https://github.com/llvm/llvm-project.git cd708029e0b2869e80abe31ddb175f7c35361f90)"}
!5 = !{!"Ubuntu clang version 20.1.0 (++20250204023448+d185bd94ff77-1~exp1~20250204023606.14)"}
!6 = distinct !{!6, !7}
!7 = !{!"llvm.loop.mustprogress"}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; ModuleID = 'PE-benchmarks/Iterative_QuickSort.cpp'
source_filename = "PE-benchmarks/Iterative_QuickSort.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
target triple = "x86_64-pc-linux-gnu"

module asm ".globl _ZSt21ios_base_library_initv"

Expand Down Expand Up @@ -319,7 +319,7 @@ attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: readwrite
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"clang version 19.1.7 (https://github.com/llvm/llvm-project.git cd708029e0b2869e80abe31ddb175f7c35361f90)"}
!5 = !{!"Ubuntu clang version 20.1.0 (++20250204023448+d185bd94ff77-1~exp1~20250204023606.14)"}
!6 = distinct !{!6, !7}
!7 = !{!"llvm.loop.mustprogress"}
!8 = distinct !{!8, !7}
Expand Down
Loading

0 comments on commit 05fb1a8

Please sign in to comment.