-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathclassify.py
More file actions
130 lines (104 loc) · 5.63 KB
/
classify.py
File metadata and controls
130 lines (104 loc) · 5.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import tweepy
import pickle
import json
class TwitterAPI:
def __init__(self):
consumer_key = "JFH6uEhcZ95ZKkbbIv4gGeiYy"
consumer_secret = "wTdFmDtNEIVS0Myq746tL87OqN0QacxwAbMvQzmn0JVxvVS8Ec"
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
access_token = "3317294695-8aWgP3ZY3CUhW71TleLmvv1g5GEbVsf0YZAhH0c"
access_token_secret = "g4VG5r3PJMmBthLf0N4ngr2NwvsV44ckXprKDBeYpTYJd"
auth.set_access_token(access_token, access_token_secret)
self.api = tweepy.API(auth)
def tweet(self, message):
self.api.update_status(status=message)
def deleteSW(self, tweets):
formatedTweets = []
splitTweets = []
for tweet in tweets:
words = tweet.split()
for word in words:
splitTweets.append(word)
stopwords = ["1","2","3","4","5","6","7","8","9","10",":",";",".","!","#","@","un","de","no","se","me","a","al","que","han","y", "una", "unas", "unos", "uno", "sobre", "todo", "tambien", "tras", "otro", "algun", "alguno","alguna", "algunos", "algunas", "ser", "es", "soy", "eres", "somos", "sois", "estoy", "esta", "estamos", "estais", "estan", "como", "en", "para", "atras", "porque", "por que", "estado", "estaba", "ante", "antes", "siendo", "ambos", "pero", "por", "poder", "puede", "puedo", "podemos", "podeis", "pueden", "fui", "fue", "fuimos", "fueron", "hacer", "hago", "hace", "hacemos", "haceis", "hacen", "cada", "fin", "incluso", "primero", "desde", "conseguir", "consigo", "consigue", "consigues", "conseguimos", "consiguen", "ir", "voy", "va", "vamos", "vais", "van", "vaya", "gueno", "ha", "tener", "tengo", "tiene", "tenemos", "teneis", "tienen", "el", "la", "lo", "las", "los", "su", "aqui", "mio", "tuyo", "ellos", "ellas", "nos", "nosotros", "vosotros", "vosotras", "si", "dentro", "solo", "solamente", "saber", "sabes", "sabe", "sabemos", "sabeis", "saben", "ultimo", "largo", "bastante", "haces", "muchos", "aquellos", "aquellas", "sus", "entonces", "tiempo", "verdad", "verdadero", "verdadera", "cierto", "ciertos", "cierta", "ciertas", "intentar", "intento", "intenta", "intentas", "intentamos", "intentais", "intentan", "dos", "bajo", "arriba", "encima", "usar", "uso", "usas", "usa", "usamos", "usais", "usan", "emplear", "empleo", "empleas", "emplean", "ampleamos", "empleais", "valor", "muy", "era", "eras", "eramos", "eran", "modo", "bien", "cual", "cuando", "donde", "mientras", "quien", "con", "entre", "sin", "trabajo", "trabajar", "trabajas", "trabaja", "trabajamos", "trabajais", "trabajan", "podria", "podrias", "podriamos", "podrian", "podriais", "yo", "aquel"]
for word in splitTweets:
if word.lower() not in stopwords:
formatedTweets.append(word)
return formatedTweets
def commonWords(self, words):
wordCounter = {}
counted = []
for word in words:
if word not in counted:
wordCounter[words.count(word)] = word
counted.append(word)
topTenWords = sorted(wordCounter.values(),reverse=True)[0:9]
return topTenWords
def description(self,id):
newText=[]
user = self.api.get_user(id)
info = user.description
for word in info.split(' '):
if word not in depure:
newText += word+' '
return newText[:-1]
def nFollowers(self, ID):
user = self.api.get_user(ID)
nFollows = user.followers_count
return nFollows
def getTopFolllowed(self, users): #returns a dictinary containing the users that have more followers (over the average)
average = 0
cont=0
topFollowed = {}
for u in users:
key=str(self.nFollowers(u))
topFollowed[str(u)] = key
average+=int(key)
cont+=1
average=average/cont
filterTop = {}
for element in topFollowed:
if int(topFollowed[element]) >= average:
filterTop[element] = topFollowed[element]
return filterTop
def getRetweetCount(self, userID):
file_name = "alltweets_%s.p"%userID
count = 0
states = []
statuses = pickle.load(open(file_name, "rb"))
for status in statuses:
stat = status.id
if stat is not None:
count = count + status.retweet_count
return count
def getTweets(self, userID):
file_name = "alltweets_%s.p"%userID
tweets = []
statuses = pickle.load(open(file_name, "rb"))
for status in statuses:
if status is not None:
stat = status.text
tweets.append(stat);
return tweets
def getFavCount(self, userID):
user = self.api.get_user(userID)
fav = user.favourites_count
return fav
#def specialize(self, userID):
if __name__ == "__main__":
twitter = TwitterAPI()
users = pickle.load(open("usersWithTweetsFlor.p", "rb"))
print "The top-ten word from your users :"
topFollowed = twitter.getTopFolllowed(users)
count = 0
data = {}
for user in topFollowed:
#print user + " with " +str(topFollowed[user]) + " followers "
#print " " + str(twitter.getRetweetCount(user)) + " retweets."
#print " " + str(twitter.getFavCount(user)) + " favourites "
print user
topTenWords = twitter.commonWords(twitter.deleteSW(twitter.getTweets(user)))
print " " + str(topTenWords)
print " " + str(topFollowed[user])
data[user] = { "ranking" : topFollowed[user] , "topTen" : topTenWords }
with open('topTenWords.json', 'w') as f:
json.dump(data,f)