|
| 1 | +# pip install pytrends |
| 2 | + |
| 3 | +from pytrends.request import TrendReq |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import pandas as pd |
| 6 | +import numpy as np |
| 7 | +import datetime |
| 8 | + |
| 9 | +def read_nasdaq_and_nyse(): |
| 10 | + base_path = "data/^IXIC-daydata.csv" |
| 11 | + nasdaq_df = pd.read_csv(base_path) |
| 12 | + base_path = "data/^NYA-daydata.csv" |
| 13 | + nyse_composite_df = pd.read_csv(base_path) |
| 14 | + return nasdaq_df, nyse_composite_df |
| 15 | + |
| 16 | +def get_csv_file(fname): |
| 17 | + csv_file = open(fname, 'r', encoding='cp1252') |
| 18 | + try: |
| 19 | + csv_reader = csv.reader(csv_file, delimiter=',') |
| 20 | + csv_headers = next(csv_reader) |
| 21 | + except: |
| 22 | + csv_reader = None |
| 23 | + csv_headers = None |
| 24 | + if csv_headers is None or csv_reader is None: |
| 25 | + raise IOError("Couldn't read CSV file") |
| 26 | + return csv_headers, csv_reader |
| 27 | + |
| 28 | +def fix_weekend_date(breachday): |
| 29 | + # Set to monday if we're on Sat |
| 30 | + if breachday.weekday() == 5: |
| 31 | + breachday = breachday + datetime.timedelta(days=2) |
| 32 | + # Set to monday if we're on Sunday |
| 33 | + if breachday.weekday() == 6: |
| 34 | + breachday = breachday + datetime.timedelta(days=1) |
| 35 | + return breachday |
| 36 | + #return breachday.strftime("%Y-%m-%d") |
| 37 | + |
| 38 | +def fix_closed_market_data(adjusted_breach_date, nasdaq_df): |
| 39 | + adjusted_breach_date_str = adjusted_breach_date.strftime("%Y-%m-%d") |
| 40 | + # The market may have been closed for whatever reason on our breach date so lets check |
| 41 | + nasdaq_breach_index = nasdaq_df.index[nasdaq_df['date'] == adjusted_breach_date_str].tolist() |
| 42 | + while nasdaq_breach_index == []: |
| 43 | + # Keep adding one day each time till we get to the next trading day |
| 44 | + adjusted_breach_date = adjusted_breach_date + datetime.timedelta(days=1) |
| 45 | + # we're gonna need the right format here to search our dataframe |
| 46 | + adjusted_breach_date_str = adjusted_breach_date.strftime("%Y-%m-%d") |
| 47 | + # give us the index where the date equals the adjusted_breach_date |
| 48 | + nasdaq_breach_index = nasdaq_df.index[nasdaq_df['date'] == adjusted_breach_date_str].tolist() |
| 49 | + return adjusted_breach_date |
| 50 | + |
| 51 | +def generate_stock_data(df, nyse_df, nasdaq_df, dates, test_data=False): |
| 52 | + stock_info = pd.DataFrame() |
| 53 | + for time in dates: |
| 54 | + one_day_stock_holder = [] |
| 55 | + for _, row in df.iterrows(): |
| 56 | + # Import our stock data |
| 57 | + if test_data: |
| 58 | + full_path = "test-data/"+ row["Symbol"] + "-daydata.csv" |
| 59 | + else: |
| 60 | + full_path = "data/"+ row["Symbol"] + "-daydata.csv" |
| 61 | + stock_df = pd.read_csv(full_path) |
| 62 | + # Fix the date if it falls on a weekend |
| 63 | + breachday = datetime.datetime.strptime(row["Publication"], "%m/%d/%y") |
| 64 | + adjusted_breach_date = fix_weekend_date(breachday) |
| 65 | + adjusted_breach_date = fix_closed_market_data(adjusted_breach_date, nasdaq_df) |
| 66 | + # convert the datetime to a string, we no longer need the datetime fmt |
| 67 | + adjusted_breach_date = adjusted_breach_date.strftime("%Y-%m-%d") |
| 68 | + # since we're index on dates, these should be unique, get the first (and only element) |
| 69 | + nasdaq_breach_index = nasdaq_df.index[nasdaq_df['date'] == adjusted_breach_date].tolist()[0] |
| 70 | + stock_breach_index = (stock_df.index[stock_df['date'] == adjusted_breach_date].tolist()[0]) |
| 71 | + |
| 72 | + # if either the NASDAQ or the stock don't have information for the future date |
| 73 | + # we need to return and ignore that stock. |
| 74 | + if stock_breach_index+time > len(stock_df): |
| 75 | + one_day_stock_holder.append(np.nan) |
| 76 | + continue |
| 77 | + if nasdaq_breach_index+time > len(nasdaq_df): |
| 78 | + one_day_stock_holder.append(np.nan) |
| 79 | + continue |
| 80 | + |
| 81 | + # get the close on breach day |
| 82 | + price_on_breach_day_and_time = stock_df.iloc[stock_breach_index+time]['close'] |
| 83 | + nasdaq_on_breach_day_and_time = nasdaq_df.iloc[nasdaq_breach_index+time]['close'] |
| 84 | + |
| 85 | + # We have the index and all data is chronological therefore subtracting one gets us the day before |
| 86 | + price_on_before_breach_day = stock_df.iloc[stock_breach_index-1]['close'] |
| 87 | + nasdaq_on_before_breach_day = nasdaq_df.iloc[nasdaq_breach_index-1]['close'] |
| 88 | + |
| 89 | + stock_per_change = ((price_on_breach_day_and_time-price_on_before_breach_day)/price_on_before_breach_day)*100 |
| 90 | + nasdaq_per_change = ((nasdaq_on_breach_day_and_time-nasdaq_on_before_breach_day)/nasdaq_on_before_breach_day)*100 |
| 91 | + adjusted_per_change = (((price_on_breach_day_and_time)/(price_on_before_breach_day)-1)*100) - (((nasdaq_on_breach_day_and_time)/(nasdaq_on_before_breach_day)-1)*100) |
| 92 | + one_day_stock_holder.append(adjusted_per_change) |
| 93 | + stock_info[f"stock_{time}_days"] = one_day_stock_holder |
| 94 | + return stock_info |
| 95 | + |
| 96 | +def get_two_week_range(date): |
| 97 | + breachday = datetime.datetime.strptime(date, "%m/%d/%y") |
| 98 | + breachday_two_weeeks = breachday + datetime.timedelta(days=14) |
| 99 | + breachday = breachday.strftime("%Y-%m-%d") |
| 100 | + breachday_two_weeeks = breachday_two_weeeks.strftime("%Y-%m-%d") |
| 101 | + return breachday, breachday_two_weeeks |
| 102 | + |
| 103 | +def main(): |
| 104 | + nasdaq_df, nyse_composite_df = read_nasdaq_and_nyse() |
| 105 | + df = pd.read_csv('../../dataset-samples.csv') |
| 106 | + pytrends = TrendReq(hl='en-US', tz=360) |
| 107 | + trending_data = [] |
| 108 | + for index, entry in df.iterrows(): |
| 109 | + trending_total = 0 |
| 110 | + comp_name = entry["Company Name"].strip('\"') |
| 111 | + breachday, breachday_two_weeeks = get_two_week_range(entry["Publication"]) |
| 112 | + |
| 113 | + kw_list = [f"{comp_name} Breach"] |
| 114 | + pytrends.build_payload(kw_list, cat=0, timeframe=f'{breachday} {breachday_two_weeeks}', geo='', gprop='') |
| 115 | + trends_data = pytrends.interest_over_time() |
| 116 | + |
| 117 | + if trends_data.empty: |
| 118 | + trending_data.append(trending_total) |
| 119 | + continue |
| 120 | + for day in trends_data[kw_list[0]]: |
| 121 | + trending_total+=day |
| 122 | + print(f"Found Total for {comp_name} - {trending_total}") |
| 123 | + trending_data.append(trending_total) |
| 124 | + df["Trending_amount"] = trending_data |
| 125 | + |
| 126 | + df.to_csv("../../dataset-samples.csv") |
| 127 | + |
| 128 | + |
| 129 | +main() |
0 commit comments