Scikit Learn 股票投资:p24

前言

在教学视频p23中,我们筛选出了400多个股票可能会跑赢大盘的股票。但我们没有可能全部股票都投资一遍。有些股票跑赢大盘1% 或者 2%,其实我们都是可以忽略的。 那么本节视频将重置我们的training数据,精简我们的股票。

视频

视频出处

视频系列:Scikit-learn Machine Learning with Python and SKlearn

本视频出处:Scikit Learn Machine Learning for investing Tutorial with Python p. 24

哔哩哔哩:Scikit Learn Machine Learning for investing Tutorial with Python p. 24

内容

本节视频将用到p18的代码,然后我们需要更改difference>0中的代码,改为difference > 5。 这意味着,我们认为股票要高于大盘5%才算是跑赢大盘:

if difference > 5: # 跑赢大盘5%
  status = 1
else:
  status = 0

源代码

# build training set of data
import pandas as pd
import os
import time
from datetime import datetime
import re
from time import mktime
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import style
style.use("dark_background")


path = "../intraQuarter"

def Key_Stats(
    gather=[
      "Total Debt/Equity",
      'Trailing P/E',
      'Price/Sales',
      'Price/Book',
      'Profit Margin',
      'Operating Margin',
      'Return on Assets',
      'Return on Equity',
      'Revenue Per Share',
      'Market Cap',
      'Enterprise Value',
      'Forward P/E',
      'PEG Ratio',
      'Enterprise Value/Revenue',
      'Enterprise Value/EBITDA',
      'Revenue',
      'Gross Profit',
      'EBITDA',
      'Net Income Avl to Common ',
      'Diluted EPS',
      'Earnings Growth',
      'Revenue Growth',
      'Total Cash',
      'Total Cash Per Share',
      'Total Debt',
      'Current Ratio',
      'Book Value Per Share',
      'Cash Flow',
      'Beta',
      'Held by Insiders',
      'Held by Institutions',
      'Shares Short (as of',
      'Short Ratio',
      'Short % of Float',
      'Shares Short (prior '
    ]
  ):

  statspath = path+'/_KeyStats'
  stock_list = [x[0] for x in os.walk(statspath)]
  df = pd.DataFrame(
    columns = [
      'Date',
      'Unix',
      'Ticker',
      'Price',
      'stock_p_change',
      'SP500',
      'sp500_p_change',
      'Difference',
      ##############
      'DE Ratio',
      'Trailing P/E',
      'Price/Sales',
      'Price/Book',
      'Profit Margin',
      'Operating Margin',
      'Return on Assets',
      'Return on Equity',
      'Revenue Per Share',
      'Market Cap',
      'Enterprise Value',
      'Forward P/E',
      'PEG Ratio',
      'Enterprise Value/Revenue',
      'Enterprise Value/EBITDA',
      'Revenue',
      'Gross Profit',
      'EBITDA',
      'Net Income Avl to Common ',
      'Diluted EPS',
      'Earnings Growth',
      'Revenue Growth',
      'Total Cash',
      'Total Cash Per Share',
      'Total Debt',
      'Current Ratio',
      'Book Value Per Share',
      'Cash Flow',
      'Beta',
      'Held by Insiders',
      'Held by Institutions',
      'Shares Short (as of',
      'Short Ratio',
      'Short % of Float',
      'Shares Short (prior ',                                
      ##############
      'Status'
    ]
  )

  sp500_df = pd.DataFrame.from_csv("SPY.csv")
  stock_df = pd.DataFrame.from_csv("stock_prices.csv")

  ticker_list = []

  for each_dir in stock_list[1:]:
    each_file = os.listdir(each_dir)

    # ticker = each_dir.split("\\")[1] # Windows only
    # ticker = each_dir.split("/")[1] # this didn't work so do this:
    ticker = os.path.basename(os.path.normpath(each_dir))
    # print(ticker) # uncomment to verify
    ticker_list.append(ticker)

    # starting_stock_value = False
    # starting_sp500_value = False

    if len(each_file) > 0:
      for file in each_file:
        date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
        unix_time = time.mktime(date_stamp.timetuple())
        full_file_path = each_dir+'/'+file
        source = open(full_file_path,'r').read()
        try:
          value_list = []
          for each_data in gather:
            try:
              regex = re.escape(each_data) + r'.*?(\d{1,8}\.\d{1,8}M?B?|N/A)%?</td>'
              value = re.search(regex, source)
              value = (value.group(1))
              if "B" in value:
                value = float(value.replace("B",''))*1000000000
              elif "M" in value:
                value = float(value.replace("M",''))*1000000
              value_list.append(value)
            except Exception as e:
              value = "N/A"
              value_list.append(value)

          try:
            sp500_date = datetime.fromtimestamp(unix_time).strftime('%Y-%m-%d')
            row = sp500_df[(sp500_df.index == sp500_date)]
            sp500_value = float(row["Adj Close"])
          except:
            sp500_date = datetime.fromtimestamp(unix_time-259200).strftime('%Y-%m-%d')
            row = sp500_df[(sp500_df.index == sp500_date)]
            sp500_value = float(row["Adj Close"])

          one_year_later = int(unix_time + 31536000)

          try:
            sp500_1y = datetime.fromtimestamp(one_year_later).strftime('%Y-%m-%d')
            row = sp500_df[(sp500_df.index == sp500_1y)]
            sp500_1y_value = float(row["Adj Close"])
          except Exception as e:
            try:
              sp500_1y = datetime.fromtimestamp(one_year_later - 259200).strftime('%Y-%m-%d')
              row = sp500_df[(sp500_df.index == sp500_1y)]
              sp500_1y_value = float(row["Adj Close"])
            except Exception as e:
              print("S&P 500 1 year later: exception:",str(e))

          try:
            stock_price_1y = datetime.fromtimestamp(one_year_later).strftime('%Y-%m-%d')
            row = stock_df[(stock_df.index == stock_price_1y)][ticker.upper()]
            stock_1y_value = round(float(row),2)
          except Exception as e:
            try:
              stock_price_1y = datetime.fromtimestamp(one_year_later - 259200).strftime('%Y-%m-%d')
              row = stock_df[(stock_df.index == stock_price_1y)][ticker.upper()]
              stock_1y_value = round(float(row),2)
            except Exception as e:
              print("stock price 1 year later: exception:",str(e))

          try:
            stock_price = datetime.fromtimestamp(unix_time).strftime('%Y-%m-%d')
            row = stock_df[(stock_df.index == stock_price)][ticker.upper()]
            stock_price = round(float(row),2)
          except Exception as e:
            try:
              stock_price = datetime.fromtimestamp(unix_time - 259200).strftime('%Y-%m-%d')
              row = stock_df[(stock_df.index == stock_price)][ticker.upper()]
              stock_price = round(float(row),2)
            except Exception as e:
              print("stock price: exception:",str(e))

          stock_p_change = round((((stock_1y_value - stock_price) / stock_price) * 100), 2)
          sp500_p_change = round((((sp500_1y_value - sp500_value) / sp500_value) * 100), 2)

          difference = stock_p_change - sp500_p_change

          if difference > 5: # 跑赢大盘5%
            status = 1
          else:
            status = 0

          # if value_list.count("N/A") > 0:
          if value_list.count("N/A") > 15:
            pass
          else:
            df = df.append(
              {
                'Date':date_stamp,
                'Unix':unix_time,
                'Ticker':ticker,
                'Price':stock_price,
                'stock_p_change':stock_p_change,
                'SP500':sp500_value,
                'sp500_p_change':sp500_p_change,
                'Difference':difference,
                'DE Ratio':value_list[0],
                #'Market Cap':value_list[1],
                'Trailing P/E':value_list[1],
                'Price/Sales':value_list[2],
                'Price/Book':value_list[3],
                'Profit Margin':value_list[4],
                'Operating Margin':value_list[5],
                'Return on Assets':value_list[6],
                'Return on Equity':value_list[7],
                'Revenue Per Share':value_list[8],
                'Market Cap':value_list[9],
                'Enterprise Value':value_list[10],
                'Forward P/E':value_list[11],
                'PEG Ratio':value_list[12],
                'Enterprise Value/Revenue':value_list[13],
                'Enterprise Value/EBITDA':value_list[14],
                'Revenue':value_list[15],
                'Gross Profit':value_list[16],
                'EBITDA':value_list[17],
                'Net Income Avl to Common ':value_list[18],
                'Diluted EPS':value_list[19],
                'Earnings Growth':value_list[20],
                'Revenue Growth':value_list[21],
                'Total Cash':value_list[22],
                'Total Cash Per Share':value_list[23],
                'Total Debt':value_list[24],
                'Current Ratio':value_list[25],
                'Book Value Per Share':value_list[26],
                'Cash Flow':value_list[27],
                'Beta':value_list[28],
                'Held by Insiders':value_list[29],
                'Held by Institutions':value_list[30],
                'Shares Short (as of':value_list[31],
                'Short Ratio':value_list[32],
                'Short % of Float':value_list[33],
                'Shares Short (prior ':value_list[34],
                'Status':status
              },
              ignore_index=True)
        except Exception as e:
          pass

  # df.to_csv("key_stats_acc_perf_NO_NA_enhanced.csv")
  df.to_csv("key_stats_acc_perf_WITH_NA_enhanced.csv")

Key_Stats()

最后

虽然分c君_BingWong只是作为一名搬运工,连码农都称不上。 但制作代码中的注释、翻译和搬运都花了很多时间,请各位大侠高抬贵手,在转载时请注明出处。

阅读量: | 柯西君_BingWong | 2017-09-05