-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAccenture_Social_Buzz_Analysis_SAS_Code
115 lines (94 loc) · 2.19 KB
/
Accenture_Social_Buzz_Analysis_SAS_Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/* Import CONTENT file */
options validvarname=v7;
PROC IMPORT DATAFILE="/home/sigrid0/Content1.xlsx"
OUT=WORK.CONTENT1
DBMS=XLSX
REPLACE;
run;
/*Check data type*/
proc contents data=content1;
run;
/*Create content dataset*/
data content_ (rename=(Type= Content_Type));
set content1;
Category_= compress(category,'"'); /*remove quotation marks in category*/
drop url category;
run;
/*Create new content dataset for processing*/
data content;
set content_;
Category= lowcase(Category_);
if Category = 'public speakin' then category_ = 'public speaking';
drop category_;
run;
/*check missing values*/
proc freq data=content order=freq;
table content_id--category / missing;
run;
/* Import REACTIONS file */
options validvarname=v7;
PROC IMPORT DATAFILE="/home/sigrid0/Reactions1.xlsx"
OUT=WORK.REACTIONS1
DBMS=XLSX
REPLACE;
RUN;
/*check data type*/
proc contents data=reactions1;
run;
/*create reactions dataset*/
data reactions_;
set reactions1;
if user_id= '' then user_id= 'missing'; /*Flagging missing*/
if type= '' then type= 'missing'; /*values*/
run;
/*frequency*/
proc freq data= reactions_;
table content_id--type;
run;
/*Split datetime variable into date & time*/
data reactions;
set reactions_;
Date= datepart(datetime);
format date date7.;
time= timepart(datetime);
format time time8.;
drop datetime;
run;
/*Check for missing values*/
proc freq data=reactions;
tables _char_/ missing; /*character variable*/
run;
proc means data= reactions nmiss;
var _numeric_; /*numeric variables*/
run;
/* Import REACTION_Types file */
options validvarname=v7;
PROC IMPORT DATAFILE="/home/sigrid0/Reaction_Types1.xlsx"
OUT=WORK.REACTION_Types1
DBMS=XLSX
REPLACE;
RUN;
/*check data type*/
proc contents data=reaction_types1;
run;
/*Reaction_types dataset*/
data reaction_types;
set reaction_types1;
run;
/*Join Content, Reactions, Reaction_types tables*/
proc sql;
create table social_B as
select a.content_id
,a.user_id
,a.type as reaction_type
,a.date
,b.content_type
,b.category
,c.sentiment
,c.score
from reactions as a
left join content as b
on a.content_id= b.content_id
left join reaction_types as c
on a.type=c.type;
quit;