Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b6f586d

Browse files
author
boididou
committedNov 18, 2014
Agreement based retraining technique added
A method that retrains the model using those testing samples that the Item and User classifier predictions agreed on.
1 parent d2c2945 commit b6f586d

15 files changed

+77
-263
lines changed
 

‎resources/files/happy-emoticons.txt

Whitespace-only changes.

‎resources/files/sad-emoticons.txt

Whitespace-only changes.

‎resources/files/third-order-prons.txt

Whitespace-only changes.

‎src/gr/iti/mklab/extractfeatures/ItemFeaturesExtractor.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ public static HashSet<String> checkForExternalLinks(MediaItem item) throws Malfo
554554
}
555555

556556
if (substring.equals(longUrl)){
557-
System.out.println("Substring equals long url!");
557+
//System.out.println("Substring equals long url!");
558558
//longUrl = expandUrl(longUrl);
559559
}
560560

‎src/gr/iti/mklab/extractfeatures/UserFeaturesExtractor.java

+11-118
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import eu.socialsensor.framework.common.domain.StreamUser;
2828
import eu.socialsensor.geo.Countrycoder;
2929
import gr.iti.mklab.utils.TextProcessing;
30+
import gr.iti.mklab.verifyutils.WebOfTrustManager;
3031

3132

3233
/**
@@ -119,17 +120,17 @@ public static String getUserUrl(Document doc) {
119120
String val = null;
120121

121122
newsHeadlines = doc.select(".url .profile-field a");
122-
System.out.println("news "+newsHeadlines);
123+
123124
if (newsHeadlines != null && !newsHeadlines.equals("")
124125
&& !newsHeadlines.isEmpty()) {
125126
val = newsHeadlines.attr("href");
126-
//System.out.println("if "+val);
127+
127128
} else {
128129
newsHeadlines = doc.select(".ProfileHeaderCard-urlText a");
129130
val = newsHeadlines.attr("href");
130-
//System.out.println("else "+val+"-");
131+
131132
}
132-
//System.out.println("val "+val);
133+
133134
return val;
134135
}
135136

@@ -412,94 +413,7 @@ public static String expandUrl(String shortenedUrl) {
412413
return expandedURL;
413414
}
414415

415-
/**
416-
* Calculates the WOT values(trust and safe) of a link. Returns 0 for
417-
* unavailable values.
418-
*
419-
* @param host
420-
* the link to calculate for
421-
* @return Integer[] WOT values
422-
* @throws MalformedURLException
423-
* @throws IOException
424-
* @throws JSONException
425-
*/
426-
public static Integer[] getWotValues(String host) throws JSONException, MalformedURLException, IOException{
427-
428-
Integer[] values = new Integer[2];
429-
430-
// String host = item.getMediaLinks().get(0).getMediaLink();
431-
System.out.println("the host " +host);
432-
433-
String host0 = expandUrl(host);
434-
if (host0 == null){
435-
host0 = host;
436-
}
437-
System.out.println("the host transformed "+host0);
438-
439-
440-
441-
InputStream response;
442-
String res = null;
443-
444-
try {
445-
response = new URL(
446-
"http://api.mywot.com/0.4/public_link_json2?hosts="
447-
+ host0
448-
+ "/&key=75ff0cddd33a6e731c2d862c570de6c19f78423f")
449-
.openStream();
450-
res = getStringFromInputStream(response);
451-
452-
} catch (Exception e1) {
453-
454-
455-
host0 = host0.split("/")[0] + "/" + host0.split("/")[1]
456-
+ "/" + host0.split("/")[2];
457-
458-
response = new URL(
459-
"http://api.mywot.com/0.4/public_link_json2?hosts="
460-
+ host0
461-
+ "/&key=75ff0cddd33a6e731c2d862c570de6c19f78423f")
462-
.openStream();
463-
464-
res = getStringFromInputStream(response);
465-
466-
467-
}
468-
//System.out.println("RESPONSE: "+res);
469-
JSONObject jO = new JSONObject(res);
470-
if (jO.length()>0){
471-
System.out.println(jO);
472-
String name = jO.names().get(0).toString();
473-
474-
System.out.println("the name " + name);
475-
476-
try {
477-
JSONArray trust = jO.getJSONObject(name).getJSONArray("0");
478-
Integer valueTrust = Integer.parseInt(trust.get(0).toString());
479-
Integer confTrust = Integer.parseInt(trust.get(1).toString());
480-
values[0] = valueTrust * confTrust / 100;
481416

482-
JSONArray safe = jO.getJSONObject(name).getJSONArray("4");
483-
Integer valueSafe = Integer.parseInt(safe.get(0).toString());
484-
Integer confSafe = Integer.parseInt(safe.get(1).toString());
485-
values[1] = valueSafe * confSafe / 100;
486-
487-
//System.out.println(valueTrust+" "+confTrust+" "+values[0]);
488-
//System.out.println(valueSafe+" "+confSafe+" "+values[1]);
489-
} catch (Exception e) {
490-
values[0] = 0;
491-
values[1] = 0;
492-
System.out.println("Not available WOT values for this link!");
493-
}
494-
495-
}
496-
//System.out.println(values[0]+" "+values[1]);
497-
if (values[0]==null || values[1]==null){
498-
values[0] = 0;
499-
values[1] = 0;
500-
}
501-
return values;
502-
}
503417

504418
public static Boolean hasProfileImg(Document doc) {
505419

@@ -535,25 +449,22 @@ public static Boolean hasHeaderImg(Document doc) {
535449
}
536450
}
537451

538-
452+
static Countrycoder countrycodingService;
539453
public static void initializeFiles() {
540454

541455
rootGeonamesDir = "C:/Users/boididou/workspace/twitter-image-verification/resources/files/";
542456
citiesFile = rootGeonamesDir + "cities1000_mod.txt";
543457
countryInfoFile = rootGeonamesDir + "countryInfo.txt";
544458
adminNamesFile = rootGeonamesDir + "admin1CodesASCII_mod.txt";
545-
459+
countrycodingService = new Countrycoder(citiesFile, countryInfoFile, adminNamesFile);
546460
}
547461

548462
public static boolean hasExistingLocation(String locationName) {
549463

550-
Countrycoder countrycodingService = new Countrycoder(citiesFile, countryInfoFile, adminNamesFile);
464+
551465
String[] locParts = null;
552466
boolean hasExistingLocation = false;
553467

554-
//System.out.println("Location name "+locationName);
555-
//System.out.println(StringUtils.isAlphanumeric(locationName)+" "+!locationName.contains("."));
556-
557468

558469
locParts = locationName.split(",");
559470

@@ -592,7 +503,6 @@ public static UserFeatures extractUserFeaturesMedia(String username,String id) t
592503

593504
UserFeatures uf = null;
594505

595-
596506
/**Get information for features by scraping their twitter profile webpage**/
597507

598508
Document doc = null;
@@ -616,8 +526,7 @@ public static UserFeatures extractUserFeaturesMedia(String username,String id) t
616526
Float FolFrieRatio = getFollowerFriendRatio(numFriends,numFollowers);
617527
Long timesListed = getTimesListed(doc);
618528
Boolean hasURL = hasUrl(doc);
619-
//System.out.println("has url "+hasURL);
620-
529+
621530
Boolean hasBio = hasBio(doc);
622531
Boolean isVerified = isVerifiedUser(doc);
623532
Long numTweets = getNumTweets(doc);
@@ -643,14 +552,12 @@ public static UserFeatures extractUserFeaturesMedia(String username,String id) t
643552
else {
644553
hasExistingLocation = hasExistingLocation(location);
645554
}
646-
//System.out.println("existing location "+hasExistingLocation);
647-
648555

649556
Integer wotTrustUser = null;
650557
Integer wotSafeUser = null;
651558
Integer[] values = {0,0};
652559

653-
if (hasURL) values = getWotValues(getUserUrl(doc));
560+
if (hasURL) values = WebOfTrustManager.getWotValues(getUserUrl(doc));
654561

655562
if (values[0] != 0 && values[1] != 0) {
656563
wotTrustUser = values[0];
@@ -671,13 +578,6 @@ public static UserFeatures extractUserFeaturesMedia(String username,String id) t
671578
.hasProfileImg(hasProfileImg)
672579
.hasHeaderImg(hasHeaderImg).wotSafeUser(wotSafeUser).tweetRatio(tweetRatio).build();
673580

674-
/*MongoHandler mh = null;
675-
try {
676-
mh = new MongoHandler(Vars.LOCALHOST_IP, Vars.DB_NAME_USER_EXTRACTION);
677-
} catch (UnknownHostException e) {
678-
e.printStackTrace();
679-
}
680-
mh.insert(uf,Vars.COLL_NAME_USER_EXTRACTION );*/
681581
}
682582
}catch(Exception e) {
683583
System.out.println("User not found for this item.");
@@ -834,8 +734,6 @@ public static UserFeatures userFeatureExtractionMedia(MediaItem item) throws Exc
834734

835735
UserFeatures userFeat = null;
836736

837-
//String id = item.getUserId();//.replace("Twitter#", "");
838-
//StreamUser su = getStreamUser(id);
839737
String username = item.getPageUrl().replaceAll("http://", "").split("//")[1];
840738
userFeat = extractUserFeaturesMedia(username, item.getId());
841739

@@ -857,12 +755,7 @@ public static List<UserFeatures> userFeatureExtractionMedia(List<MediaItem> list
857755
for (int i=0;i<listMediaItems.size();i++){
858756

859757
UserFeatures userFeatures = null;
860-
//String id = listMediaItems.get(i).getUserId().replace("Twitter#", "");
861-
862-
863-
//StreamUser su = getStreamUser(id);
864-
865-
758+
866759
String username = listMediaItems.get(i).getPageUrl().replaceAll("http://", "").split("/")[1];
867760

868761
userFeatures = extractUserFeaturesMedia(username,listMediaItems.get(i).getId());

‎src/gr/iti/mklab/utils/FileManager.java

+1-25
Original file line numberDiff line numberDiff line change
@@ -251,30 +251,6 @@ public void writeSelectedToFile(String filePath, String filePath2, String fileOu
251251
}
252252

253253

254-
public void manageRequestedItems(List<ItemFeatures> itemFeats, String db, String collection, String collection2) throws Exception {
255-
256-
List<String> ids = new ArrayList<String>();
257-
258-
for (ItemFeatures feat:itemFeats) {
259-
ids.add(feat.getId());
260-
}
261-
262-
MongoHandler mh = null;
263-
try {
264-
mh = new MongoHandler(Vars.LOCALHOST_IP, "Experiments");
265-
} catch (UnknownHostException e) {
266-
e.printStackTrace();
267-
}
268-
269-
ItemDAOImpl dao = new ItemDAOImpl(Vars.LOCALHOST_IP, db, collection);
270-
271-
for (String id:ids) {
272-
//id = id.replaceAll("Twitter::", "");
273-
Item item = dao.getItem(id);
274-
mh.insert(item, collection2);
275-
}
276-
277-
278-
}
254+
279255

280256
}

‎src/gr/iti/mklab/utils/TextProcessing.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,6 @@ public String eraseCharacters(String str){
130130
*/
131131
public String eraseAllCharacters(String str){
132132

133-
System.out.println("before " + str);
134-
135133
str = str.replaceAll("\\.", " ");
136134
str = str.replaceAll(",", " "); // Clear commas
137135
str = str.replaceAll("$", " "); // Clear $'s (optional)
@@ -160,7 +158,7 @@ public String eraseAllCharacters(String str){
160158
str = str.replaceAll("&lt;", " ");
161159

162160
str = str.trim();
163-
System.out.println("after " + str);
161+
164162
return str;
165163
}
166164

‎src/gr/iti/mklab/utils/Vars.java

+32-88
Original file line numberDiff line numberDiff line change
@@ -5,108 +5,52 @@
55
import java.util.Set;
66

77
public class Vars {
8-
//general variables
9-
public static final String LOCALHOST_IP = "160.40.50.242";
10-
11-
//path of images' urls extracted
12-
public static final String URL_IMAGES_PATH = "../TweetFeatureExtraction/resources/url_files/BringBack/fake_prot.txt";
13-
14-
//Class ItemFeaturesExtractor paths
15-
public static final String HAPPY_EMO_PATH = "C:/Users/boididou/workspace/TweetFeatureExtraction/resources/files/happy-emoticons.txt";
16-
public static final String SAD_EMO_PATH="/Users/boididou/workspace/TweetFeatureExtraction/resources/files/sad-emoticons.txt";
17-
18-
public static final String FIRST_PRON_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/first-order-prons.txt";
19-
public static final String SECOND_PRON_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/second-order-prons.txt";
20-
public static final String THIRD_PRON_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/third-order-prons.txt";
21-
22-
public static final String FIRST_PRON_ES_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/first-order-prons-spanish.txt";
23-
public static final String SECOND_PRON_ES_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/second-order-prons-spanish.txt";
24-
public static final String THIRD_PRON_ES_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/third-order-prons-spanish.txt";
25-
26-
public static final String FIRST_PRON_DE_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/first-order-prons-german.txt";
27-
public static final String SECOND_PRON_DE_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/second-order-prons-german.txt";
28-
public static final String THIRD_PRON_DE_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/third-order-prons-german.txt";
29-
30-
public static final String SLANG_ENG_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/slangwords-english.txt";
31-
public static final String SLANG_ES_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/slangwords-spanish.txt";
32-
33-
public static final String POS_WORDS_ENG_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/positive-words.txt";
34-
public static final String POS_WORDS_ES_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/positive-words-spanish.txt";
35-
public static final String POS_WORDS_DE_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/positive-words-german.txt";
36-
public static final String NEG_WORDS_ENG_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/negative-words.txt";
37-
public static final String NEG_WORDS_ES_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/negative-words-spanish.txt";
38-
public static final String NEG_WORDS_DE_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/files/negative-words-german.txt";
39-
40-
//Where to save
41-
public static final String DB_NAME_ITEM_EXTRACTION = "FeaturesObjects2";
42-
public static final String COLL_NAME_ITEM_EXTRACTION = "ItemFeatReals_Bostonunique2";
43-
44-
public static final String DB_NAME_USER_EXTRACTION = "JetBlue";
45-
public static final String COLL_NAME_USER_EXTRACTION = "UserFeatFakes2";
46-
47-
488

499

10+
//Class ItemFeaturesExtractor paths
11+
public static final String HAPPY_EMO_PATH = "resources/files/happy-emoticons.txt";
12+
public static final String SAD_EMO_PATH="resources/files/sad-emoticons.txt";
13+
14+
public static final String FIRST_PRON_PATH = "resources/files/first-order-prons.txt";
15+
public static final String SECOND_PRON_PATH = "resources/files/second-order-prons.txt";
16+
public static final String THIRD_PRON_PATH = "resources/files/third-order-prons.txt";
17+
18+
public static final String FIRST_PRON_ES_PATH = "resources/files/first-order-prons-spanish.txt";
19+
public static final String SECOND_PRON_ES_PATH = "resources/files/second-order-prons-spanish.txt";
20+
public static final String THIRD_PRON_ES_PATH = "resources/files/third-order-prons-spanish.txt";
21+
22+
public static final String FIRST_PRON_DE_PATH = "resources/files/first-order-prons-german.txt";
23+
public static final String SECOND_PRON_DE_PATH = "resources/files/second-order-prons-german.txt";
24+
public static final String THIRD_PRON_DE_PATH = "resources/files/third-order-prons-german.txt";
25+
26+
public static final String SLANG_ENG_PATH = "resources/files/slangwords-english.txt";
27+
public static final String SLANG_ES_PATH = "resources/files/slangwords-spanish.txt";
28+
29+
public static final String POS_WORDS_ENG_PATH = "resources/files/positive-words.txt";
30+
public static final String POS_WORDS_ES_PATH = "resources/files/positive-words-spanish.txt";
31+
public static final String POS_WORDS_DE_PATH = "resources/files/positive-words-german.txt";
32+
public static final String NEG_WORDS_ENG_PATH = "resources/files/negative-words.txt";
33+
public static final String NEG_WORDS_ES_PATH = "resources/files/negative-words-spanish.txt";
34+
public static final String NEG_WORDS_DE_PATH = "resources/files/negative-words-german.txt";
35+
5036

5137
//Classification
52-
//model files that we used for our training and testing experiments
38+
//Previous model files that we used for our training and testing experiments with initial item and user features included
5339
public static final String MODEL_PATH_ITEM = "resources/model/j48updated.model";
54-
public static final String MODEL_PATH_TOTAL= "resources/model/j48total.model";
5540
public static final String MODEL_PATH_USER= "resources/model/j48user.model";
41+
public static final String MODEL_PATH_TOTAL= "resources/model/j48total.model";
5642

57-
//model files for additional experiment
43+
//Updated files that we used for our experiments with new item and user features included
5844
public static final String MODEL_PATH_ITEM_sample = "resources/model/j48-item.model";
5945
public static final String MODEL_PATH_USER_sample = "resources/model/j48-user.model";
6046
public static final String MODEL_PATH_TOTAL_sample = "";
47+
6148
//supported langs
6249
public static final HashSet<String> SUPPORTED_LANGS = new HashSet<String>(Arrays.asList("en","es","nolang"));
63-
64-
//MODELS
65-
//Classification models
66-
public static final String MODEL_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/resources/models/sandy/pure_dataset/random-25-original.model";
67-
68-
public static final String MODEL_PATH2 = "/Users/boididou/workspace/TweetFeatureExtraction/resources/models/sandy/pure_dataset/j48-item.model";
69-
public static final String MODEL_PATH3 = "/Users/boididou/workspace/TweetFeatureExtraction/resources/models/sandy/pure_dataset/j48-user.model";
70-
public static final String MODEL_PATH4 = "/Users/boididou/workspace/TweetFeatureExtraction/resources/models/sandy/pure_dataset/bayes.model";
71-
72-
public static final String MODEL_PATH_total = "/Users/boididou/workspace/TweetFeatureExtraction/resources/models/sandy/pure_dataset/J48/Atts no.34 - greed/j48-test.model";
73-
public static final String MODEL_PATH_total2 = "/Users/boididou/workspace/TweetFeatureExtraction/resources/models/sandy/pure_dataset/RandomForest/Atts no.35 - greed/random-35-greedy-9.model";
74-
public static final String MODEL_PATH_total3 = "/Users/boididou/workspace/TweetFeatureExtraction/resources/models/sandy/pure_dataset/kstar-35-greedy-.model";
75-
50+
7651
//lang models
7752
public static final String MODEL_PARSER = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
78-
79-
//values
80-
//fake
81-
//public static final String[] SET_VALUES_TESTING_FAKE = new String[] { "0","1","2","3","4","5","6","12","13","15","18","20","22","23","24","25","26","30","31","32","34" };
82-
//public static final String[] SET_VALUES_TRAINING_FAKE = new String[] {"7","8","9","10","11","14","16","17","19","21","27","28","29","33","35","36","37","38","39","40","41","42","43","44","45","46","47"};
83-
public static final String[] SET_VALUES_TESTING_FAKE = new String[] { "27","28","0","1","4","5","6","12","7","8","9","10","11","14","16","17","19","21","40","41","45","46" };
84-
public static final String[] SET_VALUES_TRAINING_FAKE = new String[] {"29","33","35","36","37","38","39","40","41","42","43","44","47","20","13","15","18","22","23","24","25","2","3","26","30","31","32","34"};
85-
public static final Set<String> TRAINING_SET_FAKE = new HashSet<String>(Arrays.asList(SET_VALUES_TRAINING_FAKE));
86-
public static final Set<String> TESTING_SET_FAKE = new HashSet<String>(Arrays.asList(SET_VALUES_TESTING_FAKE));
87-
//real
88-
//public static final String[] SET_VALUES_TRAINING_REAL = new String[] {"0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","43","44","45","46","47","48","49","50"};
89-
//public static final String[] SET_VALUES_TESTING_REAL = new String[] {"51","52","53","54","55","56","57","58","59","60","61","62","63","64","65","66","67","68","69","70","71","72","73","74","75","76","77","78"};
90-
public static final String[] SET_VALUES_TRAINING_REAL = new String[] {"21","22","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","43","44","45","46","47","48","49","50","60","61","62","67","68","69","70","71","72","73","74","75","76","77","78"};
91-
public static final String[] SET_VALUES_TESTING_REAL = new String[] {"51","52","53","54","55","56","57","58","59","63","64","65","66","0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","23","24","25"};
92-
public static final Set<String> TRAINING_SET_REAL = new HashSet<String>(Arrays.asList(SET_VALUES_TRAINING_REAL));
93-
public static final Set<String> TESTING_SET_REAL = new HashSet<String>(Arrays.asList(SET_VALUES_TESTING_REAL));
94-
95-
//fake
96-
public static final String[] SET_VALUES_TESTING_FAKEb = new String[] { "0","1","2","3","4","5","6","12","13","15","18","20","22","23","24","25","26","30","31","32","34" };
97-
public static final String[] SET_VALUES_TRAINING_FAKEb = new String[] {"7","8","9","10","11","14","16","17","19","21","27","28","29","33","35","36","37","38","39","40","41","42","43","44","45","46","47"};
98-
public static final Set<String> TRAINING_SET_FAKEb = new HashSet<String>(Arrays.asList(SET_VALUES_TRAINING_FAKEb));
99-
public static final Set<String> TESTING_SET_FAKEb = new HashSet<String>(Arrays.asList(SET_VALUES_TESTING_FAKEb));
100-
//real
101-
public static final String[] SET_VALUES_TRAINING_REALb = new String[] {"0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","43","44","45","46","47","48","49","50"};
102-
public static final String[] SET_VALUES_TESTING_REALb = new String[] {"51","52","53","54","55","56","57","58","59","60","61","62","63","64","65","66","67","68","69","70","71","72","73","74","75","76","77","78"};
103-
public static final Set<String> TRAINING_SET_REALb = new HashSet<String>(Arrays.asList(SET_VALUES_TRAINING_REALb));
104-
public static final Set<String> TESTING_SET_REALb = new HashSet<String>(Arrays.asList(SET_VALUES_TESTING_REALb));
105-
//hashset
106-
public static final String URL_HASHMAP_FAKE = "hashmap.txt";
107-
public static final String URL_HASHMAP_REAL = "hashmapreal.txt";
108-
public static final String FAKE_WORDS_PATH = "/Users/boididou/workspace/TweetFeatureExtraction/frequencymanager/fakes/mapfake.txt";
109-
53+
11054
//hashset stanford parser labels
11155
public static final String[] set_labels = {"NN","NNS","NNP","NNPS"};
11256
public static final Set<String> LABELS = new HashSet<String>(Arrays.asList(set_labels));

‎src/gr/iti/mklab/verify/AgreementBasedRetraining.java

+15-10
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ public void classifyDisagreed(List<ElementAnnotation> listEla) throws Exception
592592

593593

594594
//compare the scores of the cross validation in order to set the pointer value(0 for the Item classifer and 1 for the User classifier)
595-
if (itemScore>userScore) {
595+
if (itemScore > userScore) {
596596
training = new Instances("Rel1", ItemClassifier.getFvAttributes(), ids_agreed.size());
597597
testing = new Instances("Rel2", ItemClassifier.getFvAttributes(), ids_disagreed.size());
598598
training.setClassIndex(ItemClassifier.getFvAttributes().size() - 1);
@@ -775,14 +775,12 @@ public static void verifyItems() throws Exception {
775775

776776
//define the fake and real list of MediaItems for Item and User classifier case used for training
777777
List<MediaItem> trainFake = new ArrayList<MediaItem>();
778-
MediaItemDAOImpl dao = new MediaItemDAOImpl(Vars.LOCALHOST_IP, "Malaysia", "FakeItems_unique");
778+
MediaItemDAOImpl dao = new MediaItemDAOImpl("160.40.50.242", "Malaysia", "FakeItems_unique");
779779
trainFake = dao.getLastMediaItems(20);
780-
System.out.println("trainFake "+trainFake.size());
781780

782781
List<MediaItem> trainReal = new ArrayList<MediaItem>();
783-
MediaItemDAOImpl dao2 = new MediaItemDAOImpl(Vars.LOCALHOST_IP, "FerrySinks", "RealItems_unique");
782+
MediaItemDAOImpl dao2 = new MediaItemDAOImpl("160.40.50.242", "FerrySinks", "RealItems_unique");
784783
trainReal = dao2.getLastMediaItems(20);
785-
System.out.println("trainReal "+trainReal.size());
786784

787785
List<List<MediaItem>> list = new ArrayList<List<MediaItem>>();
788786
list.add(trainFake);
@@ -794,12 +792,12 @@ public static void verifyItems() throws Exception {
794792

795793
//define the fake and real list of MediaItems for Item and User classifier case used for testing
796794
List<MediaItem> testFake = new ArrayList<MediaItem>();
797-
MediaItemDAOImpl dao3 = new MediaItemDAOImpl(Vars.LOCALHOST_IP, "Sochi", "ItemsFake_unique");
798-
testFake = dao3.getLastMediaItems(5);
795+
MediaItemDAOImpl dao3 = new MediaItemDAOImpl("160.40.50.242", "Sochi", "ItemsFake_unique");
796+
testFake = dao3.getLastMediaItems(40);
799797

800798
List<MediaItem> testReal = new ArrayList<MediaItem>();
801-
MediaItemDAOImpl dao4 = new MediaItemDAOImpl(Vars.LOCALHOST_IP, "Sochi", "ItemsReal_unique");
802-
testReal = dao4.getLastMediaItems(25);
799+
MediaItemDAOImpl dao4 = new MediaItemDAOImpl("160.40.50.242", "Sochi", "ItemsReal_unique");
800+
testReal = dao4.getLastMediaItems(40);
803801

804802
List<List<MediaItem>> list2 = new ArrayList<List<MediaItem>>();
805803
list2.add(testFake);
@@ -808,13 +806,17 @@ public static void verifyItems() throws Exception {
808806
//call method to create the testing sets with the lists given above
809807
testDatasets = dvb.getTestDatasets(list2);
810808

811-
//repeat the process several times in order to differentiate the training set
809+
//repeat the process several times in order to differentiate the training set (statement "for" is an optional part, you can just run it once)
812810
for (int i=0;i<randomVals.size();i++) {
813811

814812
//values initialization before each trial execution
815813
initializeParameters();
816814

817815
//call method to find the common sets among the testing sets
816+
/*Even if we define one set of testing items, feature extraction may not be performed for some of them, i.e a user's account may be suspended,
817+
so there will be no user features for this one, but only item features. So, we aim to find those items that co-exist in the two sets and have both
818+
item and user features.
819+
*/
818820
sets = dvb.findCommonSets(testDatasets);
819821

820822
//define the current value of random values
@@ -829,10 +831,13 @@ public static void verifyItems() throws Exception {
829831
e.printStackTrace();
830832
}
831833

834+
//trainingSize is the number of the training items Bagging will use
832835
int trainingSize = 5;
836+
833837
//define the set of classifiers for each case
834838
Classifier[] itemCls;
835839
Classifier[] userCls;
840+
836841
try {
837842

838843
//call method to create the bagging classifiers with the trainDatasets and sets for item and user case

‎src/gr/iti/mklab/verify/Bagging.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -510,9 +510,7 @@ public static Classifier[] createClassifiersUser(Instances training2, Instances
510510

511511
currentTrain = DataHandler.getInstance().getTransformedTrainingUser(currentTrain);
512512

513-
/*if (testingSetsUser[j]!=null) {
514-
System.out.println(testingSetsUser[j].size());
515-
}*/
513+
516514
testingSetsUser[j] = DataHandler.getInstance().getTransformedTestingUser(testing);
517515

518516
FilteredClassifier fc = new FilteredClassifier();

‎src/gr/iti/mklab/verify/ItemClassifier.java

+5-6
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ public static double[] findProbDistribution(Instances isTestSet) throws Exceptio
618618
//probabilities variable
619619
double[] probabilities = new double[isTestSet.size()];
620620
SerializedClassifier classifier = new SerializedClassifier();
621-
classifier.setModelFile(new File(Vars.MODEL_PATH_ITEM));
621+
classifier.setModelFile(new File(Vars.MODEL_PATH_ITEM_sample));
622622

623623
for (int i = 0; i < isTestSet.numInstances(); i++) {
624624
double[] probabilityDistribution = classifier.distributionForInstance(isTestSet.instance(i));
@@ -629,11 +629,10 @@ public static double[] findProbDistribution(Instances isTestSet) throws Exceptio
629629

630630
public static Instances formTrainingSet(List<MediaItem> itemsFake, List<MediaItem> itemsReal) throws Exception {
631631

632-
632+
System.out.println("Training set: Item features extraction for fake items...");
633633
List<ItemFeatures> itemFeatsFake = ItemFeaturesExtractor.featureExtractionMedia(itemsFake);
634-
System.out.println("itemFeatsFake "+itemFeatsFake.size());
634+
System.out.println("Training set: Item features extraction for real items...");
635635
List<ItemFeatures> itemFeatsReal = ItemFeaturesExtractor.featureExtractionMedia(itemsReal);
636-
System.out.println("itemFeatsReal "+itemFeatsReal.size());
637636

638637
Instances isTrainingSet = null;
639638
// define the list of itemFeatures that are used for training
@@ -672,9 +671,9 @@ public static Instances formTrainingSet(List<MediaItem> itemsFake, List<MediaIte
672671

673672
public static Instances formTestingSet(List<MediaItem> itemsFake, List<MediaItem> itemsReal) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {
674673

675-
System.out.println("Item features extraction for fake items");
674+
System.out.println("Testing set: Item features extraction for fake items...");
676675
List<ItemFeatures> itemFeatsFake = ItemFeaturesExtractor.featureExtractionMedia(itemsFake);
677-
System.out.println("Item features extraction for real items");
676+
System.out.println("Testing set: Item features extraction for real items...");
678677
List<ItemFeatures> itemFeatsReal = ItemFeaturesExtractor.featureExtractionMedia(itemsReal);
679678

680679
List<ItemFeatures> itemFeaturesTesting = new ArrayList<ItemFeatures>();

‎src/gr/iti/mklab/verify/TotalClassifier.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import gr.iti.mklab.extractfeatures.UserFeaturesExtractor;
2626
import gr.iti.mklab.utils.Vars;
2727

28+
29+
2830
/**
2931
* Class to organize the Total classification using Item and User features
3032
* by declaring the attributes,creating the testing set
@@ -505,7 +507,7 @@ public static double[] findProbDistribution(Instances isTestSet) throws Exceptio
505507
//probabilities variable
506508
double[] probabilities = new double[isTestSet.size()];
507509
SerializedClassifier classifier = new SerializedClassifier();
508-
classifier.setModelFile(new File(Vars.MODEL_PATH_TOTAL));
510+
//classifier.setModelFile(new File(Vars.MODEL_PATH_TOTAL));
509511

510512
for (int i = 0; i < isTestSet.numInstances(); i++) {
511513
double[] probabilityDistribution = classifier.distributionForInstance(isTestSet.instance(i));

‎src/gr/iti/mklab/verify/UserClassifier.java

+5-6
Original file line numberDiff line numberDiff line change
@@ -633,12 +633,11 @@ public static double[] findProbDistribution(Instances isTestSet)
633633

634634
public static Instances formTrainingSet(List<MediaItem> itemsFake, List<MediaItem> itemsReal) throws Exception {
635635

636-
636+
System.out.println("Training set: User features extraction for fake items...");
637637
List<UserFeatures> userFeatsFake = UserFeaturesExtractor.userFeatureExtractionMedia(itemsFake);
638-
System.out.println("userFeatsFake user "+ userFeatsFake.size());
638+
System.out.println("Training set: User features extraction for real items...");
639639
List<UserFeatures> userFeatsReal = UserFeaturesExtractor.userFeatureExtractionMedia(itemsReal);
640-
System.out.println("userFeatsReal user "+ userFeatsReal.size());
641-
640+
642641
// define the list of User Features that are used for training
643642
List<UserFeatures> userFeaturesTraining = new ArrayList<UserFeatures>();
644643

@@ -684,9 +683,9 @@ public static Instances formTrainingSet(List<MediaItem> itemsFake, List<MediaIte
684683

685684
public static Instances formTestingSet(List<MediaItem> itemsFake, List<MediaItem> itemsReal) throws Exception{
686685

687-
System.out.println("User features extraction for fake items");
686+
System.out.println("Testing set: User features extraction for fake items");
688687
List<UserFeatures> itemFeatsFake = UserFeaturesExtractor.userFeatureExtractionMedia(itemsFake);
689-
System.out.println("User features extraction for real items");
688+
System.out.println("Testing set: User features extraction for real items");
690689
List<UserFeatures> itemFeatsReal = UserFeaturesExtractor.userFeatureExtractionMedia(itemsReal);
691690

692691
// define the list of User Features that are used for training

‎src/gr/iti/mklab/verifyutils/DataHandler.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ public Normalize createNormalizationFilter(Instances isTrainingSet) {
8686
norm.setInputFormat(isTrainingSet);
8787

8888
// set and print the normalization options
89-
System.out.println();
89+
9090
String[] options = { "-S", "2.0", "-T", "-1.0" };
9191
norm.setOptions(options);
9292
//System.out.print("Normalization options:\t");

‎src/gr/iti/mklab/verifyutils/WebOfTrustManager.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public static String expandUrl(String shortenedUrl) {
6464
* @throws IOException
6565
* @throws JSONException
6666
*/
67-
public Integer[] getWotValues(String host) throws MalformedURLException,
67+
public static Integer[] getWotValues(String host) throws MalformedURLException,
6868
IOException, JSONException {
6969

7070
Integer[] values = { 0, 0 };

0 commit comments

Comments
 (0)
Please sign in to comment.