Skip to content

Commit 1f57963

Browse files
author
csanders-git
committed
Adding Social Data and pptx
1 parent 6f001fe commit 1f57963

22 files changed

+4165
-3858
lines changed

Shmoocon_ Playing the short game.pptx

7.17 MB
Binary file not shown.

Stock_Data.ipynb

+526-347
Large diffs are not rendered by default.

dataset-samples.csv

+154-154
Large diffs are not rendered by default.

dataset.csv

+3,356-3,357
Large diffs are not rendered by default.

images/NL-resuls-BLK.png

-18 Bytes
Loading

images/NL-resuls-DGX.png

1.28 KB
Loading

images/NL-resuls-FAF.png

48 Bytes
Loading

images/NL-resuls-OPK.png

-12 Bytes
Loading

images/Regression-resuls-FAF.png

2.01 KB
Loading
-117 KB
Loading

images/breaches-by-any-trending.png

265 KB
Loading

images/breaches-by-large-trending.png

290 KB
Loading
-290 KB
Binary file not shown.
Loading
Binary file not shown.
Loading

images/breaches-under-hack.png

-313 KB
Binary file not shown.
-480 KB
Loading

images/public-breaches-by-style.png

30 Bytes
Loading
Loading
-446 KB
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# pip install pytrends
2+
3+
from pytrends.request import TrendReq
4+
import matplotlib.pyplot as plt
5+
import pandas as pd
6+
import numpy as np
7+
import datetime
8+
9+
def read_nasdaq_and_nyse():
10+
base_path = "data/^IXIC-daydata.csv"
11+
nasdaq_df = pd.read_csv(base_path)
12+
base_path = "data/^NYA-daydata.csv"
13+
nyse_composite_df = pd.read_csv(base_path)
14+
return nasdaq_df, nyse_composite_df
15+
16+
def get_csv_file(fname):
17+
csv_file = open(fname, 'r', encoding='cp1252')
18+
try:
19+
csv_reader = csv.reader(csv_file, delimiter=',')
20+
csv_headers = next(csv_reader)
21+
except:
22+
csv_reader = None
23+
csv_headers = None
24+
if csv_headers is None or csv_reader is None:
25+
raise IOError("Couldn't read CSV file")
26+
return csv_headers, csv_reader
27+
28+
def fix_weekend_date(breachday):
29+
# Set to monday if we're on Sat
30+
if breachday.weekday() == 5:
31+
breachday = breachday + datetime.timedelta(days=2)
32+
# Set to monday if we're on Sunday
33+
if breachday.weekday() == 6:
34+
breachday = breachday + datetime.timedelta(days=1)
35+
return breachday
36+
#return breachday.strftime("%Y-%m-%d")
37+
38+
def fix_closed_market_data(adjusted_breach_date, nasdaq_df):
39+
adjusted_breach_date_str = adjusted_breach_date.strftime("%Y-%m-%d")
40+
# The market may have been closed for whatever reason on our breach date so lets check
41+
nasdaq_breach_index = nasdaq_df.index[nasdaq_df['date'] == adjusted_breach_date_str].tolist()
42+
while nasdaq_breach_index == []:
43+
# Keep adding one day each time till we get to the next trading day
44+
adjusted_breach_date = adjusted_breach_date + datetime.timedelta(days=1)
45+
# we're gonna need the right format here to search our dataframe
46+
adjusted_breach_date_str = adjusted_breach_date.strftime("%Y-%m-%d")
47+
# give us the index where the date equals the adjusted_breach_date
48+
nasdaq_breach_index = nasdaq_df.index[nasdaq_df['date'] == adjusted_breach_date_str].tolist()
49+
return adjusted_breach_date
50+
51+
def generate_stock_data(df, nyse_df, nasdaq_df, dates, test_data=False):
52+
stock_info = pd.DataFrame()
53+
for time in dates:
54+
one_day_stock_holder = []
55+
for _, row in df.iterrows():
56+
# Import our stock data
57+
if test_data:
58+
full_path = "test-data/"+ row["Symbol"] + "-daydata.csv"
59+
else:
60+
full_path = "data/"+ row["Symbol"] + "-daydata.csv"
61+
stock_df = pd.read_csv(full_path)
62+
# Fix the date if it falls on a weekend
63+
breachday = datetime.datetime.strptime(row["Publication"], "%m/%d/%y")
64+
adjusted_breach_date = fix_weekend_date(breachday)
65+
adjusted_breach_date = fix_closed_market_data(adjusted_breach_date, nasdaq_df)
66+
# convert the datetime to a string, we no longer need the datetime fmt
67+
adjusted_breach_date = adjusted_breach_date.strftime("%Y-%m-%d")
68+
# since we're index on dates, these should be unique, get the first (and only element)
69+
nasdaq_breach_index = nasdaq_df.index[nasdaq_df['date'] == adjusted_breach_date].tolist()[0]
70+
stock_breach_index = (stock_df.index[stock_df['date'] == adjusted_breach_date].tolist()[0])
71+
72+
# if either the NASDAQ or the stock don't have information for the future date
73+
# we need to return and ignore that stock.
74+
if stock_breach_index+time > len(stock_df):
75+
one_day_stock_holder.append(np.nan)
76+
continue
77+
if nasdaq_breach_index+time > len(nasdaq_df):
78+
one_day_stock_holder.append(np.nan)
79+
continue
80+
81+
# get the close on breach day
82+
price_on_breach_day_and_time = stock_df.iloc[stock_breach_index+time]['close']
83+
nasdaq_on_breach_day_and_time = nasdaq_df.iloc[nasdaq_breach_index+time]['close']
84+
85+
# We have the index and all data is chronological therefore subtracting one gets us the day before
86+
price_on_before_breach_day = stock_df.iloc[stock_breach_index-1]['close']
87+
nasdaq_on_before_breach_day = nasdaq_df.iloc[nasdaq_breach_index-1]['close']
88+
89+
stock_per_change = ((price_on_breach_day_and_time-price_on_before_breach_day)/price_on_before_breach_day)*100
90+
nasdaq_per_change = ((nasdaq_on_breach_day_and_time-nasdaq_on_before_breach_day)/nasdaq_on_before_breach_day)*100
91+
adjusted_per_change = (((price_on_breach_day_and_time)/(price_on_before_breach_day)-1)*100) - (((nasdaq_on_breach_day_and_time)/(nasdaq_on_before_breach_day)-1)*100)
92+
one_day_stock_holder.append(adjusted_per_change)
93+
stock_info[f"stock_{time}_days"] = one_day_stock_holder
94+
return stock_info
95+
96+
def get_two_week_range(date):
97+
breachday = datetime.datetime.strptime(date, "%m/%d/%y")
98+
breachday_two_weeeks = breachday + datetime.timedelta(days=14)
99+
breachday = breachday.strftime("%Y-%m-%d")
100+
breachday_two_weeeks = breachday_two_weeeks.strftime("%Y-%m-%d")
101+
return breachday, breachday_two_weeeks
102+
103+
def main():
104+
nasdaq_df, nyse_composite_df = read_nasdaq_and_nyse()
105+
df = pd.read_csv('../../dataset-samples.csv')
106+
pytrends = TrendReq(hl='en-US', tz=360)
107+
trending_data = []
108+
for index, entry in df.iterrows():
109+
trending_total = 0
110+
comp_name = entry["Company Name"].strip('\"')
111+
breachday, breachday_two_weeeks = get_two_week_range(entry["Publication"])
112+
113+
kw_list = [f"{comp_name} Breach"]
114+
pytrends.build_payload(kw_list, cat=0, timeframe=f'{breachday} {breachday_two_weeeks}', geo='', gprop='')
115+
trends_data = pytrends.interest_over_time()
116+
117+
if trends_data.empty:
118+
trending_data.append(trending_total)
119+
continue
120+
for day in trends_data[kw_list[0]]:
121+
trending_total+=day
122+
print(f"Found Total for {comp_name} - {trending_total}")
123+
trending_data.append(trending_total)
124+
df["Trending_amount"] = trending_data
125+
126+
df.to_csv("../../dataset-samples.csv")
127+
128+
129+
main()

0 commit comments

Comments
 (0)