Scikit Learn 股票投资:p22

前言

在上一节教程中,我们从yahoo上获得了股票最新的基本面数据。 本节视频将讲述如何应用新的数据。

视频

视频出处

视频系列:Scikit-learn Machine Learning with Python and SKlearn

本视频出处:Scikit Learn Machine Learning for investing Tutorial with Python p. 22

哔哩哔哩:Scikit Learn Machine Learning for investing Tutorial with Python p. 22

内容

我们只是回测更新了的数据,所以本次本次教程的代码是基于p18的修改而来的。 作者将function的名称改了,然后将文件目录的路径改了,然后删除了一些不必要的代码。里面的代码跟之前基本一样,这里我就不作注释了。

源代码

import pandas as pd
import os
import time
from datetime import datetime
import re
from time import mktime
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import style
style.use("dark_background")

path = "../intraQuarter"

def Forward(
    gather=[
      "Total Debt/Equity",
      'Trailing P/E',
      'Price/Sales',
      'Price/Book',
      'Profit Margin',
      'Operating Margin',
      'Return on Assets',
      'Return on Equity',
      'Revenue Per Share',
      'Market Cap',
      'Enterprise Value',
      'Forward P/E',
      'PEG Ratio',
      'Enterprise Value/Revenue',
      'Enterprise Value/EBITDA',
      'Revenue',
      'Gross Profit',
      'EBITDA',
      'Net Income Avl to Common ',
      'Diluted EPS',
      'Earnings Growth',
      'Revenue Growth',
      'Total Cash',
      'Total Cash Per Share',
      'Total Debt',
      'Current Ratio',
      'Book Value Per Share',
      'Cash Flow',
      'Beta',
      'Held by Insiders',
      'Held by Institutions',
      'Shares Short (as of',
      'Short Ratio',
      'Short % of Float',
      'Shares Short (prior '
    ]
  ):

  df = pd.DataFrame(
    columns = [
      'Date',
      'Unix',
      'Ticker',
      'Price',
      'stock_p_change',
      'SP500',
      'sp500_p_change',
      'Difference',
      ##############
      'DE Ratio',
      'Trailing P/E',
      'Price/Sales',
      'Price/Book',
      'Profit Margin',
      'Operating Margin',
      'Return on Assets',
      'Return on Equity',
      'Revenue Per Share',
      'Market Cap',
      'Enterprise Value',
      'Forward P/E',
      'PEG Ratio',
      'Enterprise Value/Revenue',
      'Enterprise Value/EBITDA',
      'Revenue',
      'Gross Profit',
      'EBITDA',
      'Net Income Avl to Common ',
      'Diluted EPS',
      'Earnings Growth',
      'Revenue Growth',
      'Total Cash',
      'Total Cash Per Share',
      'Total Debt',
      'Current Ratio',
      'Book Value Per Share',
      'Cash Flow',
      'Beta',
      'Held by Insiders',
      'Held by Institutions',
      'Shares Short (as of',
      'Short Ratio',
      'Short % of Float',
      'Shares Short (prior ',                                
      ##############
      'Status'
    ]
  )
  #输入新的文件路径
  file_list = os.listdir(path+"/forward")
  for each_file in file_list[1:]:
    ticker = each_file.split(".html")[0]
    full_file_path = path+"/forward/"+each_file
    source = open(full_file_path,'r').read()
    try:
      value_list = []
      for each_data in gather:
        try:
          regex = re.escape(each_data) + r'.*?(\d{1,8}\.\d{1,8}M?B?|N/A)%?</td>'
          value = re.search(regex, source)
          value = (value.group(1))
          if "B" in value:
            value = float(value.replace("B",''))*1000000000
          elif "M" in value:
            value = float(value.replace("M",''))*1000000
          value_list.append(value)
        except Exception as e:
          value = "N/A"
          value_list.append(value)

      # if value_list.count("N/A") > 0:
      if value_list.count("N/A") > 15:
        pass
      else:
        df = df.append(
          {
            'Date':"N/A",
            'Unix':"N/A",
            'Ticker':ticker,
            'Price':"N/A",
            'stock_p_change':"N/A",
            'SP500':"N/A",
            'sp500_p_change':"N/A",
            'Difference':"N/A",
            'DE Ratio':value_list[0],
            #'Market Cap':value_list[1],
            'Trailing P/E':value_list[1],
            'Price/Sales':value_list[2],
            'Price/Book':value_list[3],
            'Profit Margin':value_list[4],
            'Operating Margin':value_list[5],
            'Return on Assets':value_list[6],
            'Return on Equity':value_list[7],
            'Revenue Per Share':value_list[8],
            'Market Cap':value_list[9],
            'Enterprise Value':value_list[10],
            'Forward P/E':value_list[11],
            'PEG Ratio':value_list[12],
            'Enterprise Value/Revenue':value_list[13],
            'Enterprise Value/EBITDA':value_list[14],
            'Revenue':value_list[15],
            'Gross Profit':value_list[16],
            'EBITDA':value_list[17],
            'Net Income Avl to Common ':value_list[18],
            'Diluted EPS':value_list[19],
            'Earnings Growth':value_list[20],
            'Revenue Growth':value_list[21],
            'Total Cash':value_list[22],
            'Total Cash Per Share':value_list[23],
            'Total Debt':value_list[24],
            'Current Ratio':value_list[25],
            'Book Value Per Share':value_list[26],
            'Cash Flow':value_list[27],
            'Beta':value_list[28],
            'Held by Insiders':value_list[29],
            'Held by Institutions':value_list[30],
            'Shares Short (as of':value_list[31],
            'Short Ratio':value_list[32],
            'Short % of Float':value_list[33],
            'Shares Short (prior ':value_list[34],
            'Status':"N/A"
          },
          ignore_index=True)
    except Exception as e:
      pass

  # df.to_csv("forward_sample_NO_NA.csv")
  df.to_csv("forward_sample_WITH_NA.csv")

Forward()

最后

虽然分c君_BingWong只是作为一名搬运工,连码农都称不上。 但制作代码中的注释、翻译和搬运都花了很多时间,请各位大侠高抬贵手,在转载时请注明出处。

阅读量: | 柯西君_BingWong | 2017-09-05