#!/usr/bin/python
# coding:utf-8
import os
from bs4 import BeautifulSoup
import requests
import json
nds = []
for yar in range(2018,2023):
url = 'https://money.finance.sina.com.cn/corp/go.php/vFD_ProfitStatement/stockid/002279/ctrl/'+str(yar)+'/displaytype/4.phtml'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
title = soup.title.text
#print(title)
all_products = []
products = soup.select('div.tagmain')
print(products)
print('----------------↓抓取数据表格------------')
pythontable = soup.find('table',{'id':'ProfitStatementNewTable0'})
print(pythontable)
print('---------------↓抓取tbody-------------')
pythontable1 = soup.find('table',{'id':'ProfitStatementNewTable0'}).find('tbody')
print(pythontable1)
print('---------------↓抓取每一行-------------')
array = []
rows = pythontable1.findAll('tr')
for row in rows:
cols = row.findAll('td')
hang1 = []
hang = []
for i, col in enumerate(cols):
hang1.append(col.text)
if i == 0:
print(col.text)
if i > 0:
hang.append(col.text)
if i == 4:
print(hang)
array.append(hang1)
print('---------------↓打印这个table-------------')
print(array)
print('---------------↓table转为python类型JSON-------------')
nd = []
for i in range(4):
jdObj = {}
jd = {}
for j, row in enumerate(array):
if j>0:
cols = row
# 替换字符,字符串直接调用replace方法
msg = cols[i+1].replace(',', '')
jd[cols[0]] = msg
if len(jd)== len(array)-1:
jdObj['季度'] = array[0][1+i]
jdObj['value'] = jd
nd.append(jdObj)
#print(nd)
ndObj = {}
ndObj['年度'] = yar
ndObj['value'] = nd
nds.append(ndObj)
print('---------------↓输出年度JSON-------------')
en_json = json.dumps(nd, ensure_ascii=False)
print(en_json)
print('---------------↓输出全部JSON-------------')
en_json1 = json.dumps(nds, ensure_ascii=False)
print(en_json1)
# os.remove('/Users/sujiewei/Desktop/mysql.txt')
with open('/Users/sujiewei/Desktop/mysql.txt', 'w') as f:
f.write(en_json1)
from pyecharts.charts import Bar,Line
import pyecharts.options as opts
import json
x_data = []
print('----------------↓读取json文件------------')
with open("/Users/sujiewei/Desktop/mysql.txt", "r", encoding="utf-8") as f:
content = json.load(f)
print(content)
print('----------------↓整合营业总收入和营业总成本data------------')
zcbData = []
zsrData = []
#总
for j, col in enumerate(content):
zcb = 0.00
zsr = 0.00
x_data.append(str(col.get("年度"))+'年')
yar = col.get("value")
#年
for i, data in enumerate(yar):
print(data)
#季度
mo = data.get("value")
print(mo)
zsr += float(mo["一、营业总收入"])
zcb += float(mo["二、营业总成本"])
zcbData.append(zcb)
zsrData.append(zsr)
#qu 净利润
v = list(map(lambda x: x[0]-x[1], zip(zsrData, zcbData)))
print(zsrData)
print(zcbData)
print(v)
print('----------------↓生成统计图------------')
bar = (
Bar()
.add_xaxis(xaxis_data=x_data)
.add_yaxis(
series_name="营业总成本",
y_axis=zcbData,
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="营业总收入",
y_axis=zsrData,
label_opts=opts.LabelOpts(is_show=False),
)
.extend_axis(
yaxis=opts.AxisOpts(
name="净利润",
type_="value",
min_=min(v),
max_=max(v),
# interval=5,
axislabel_opts=opts.LabelOpts(formatter="{value} 万元"),
)
)
.set_global_opts(
tooltip_opts=opts.TooltipOpts(
is_show=True, trigger="axis", axis_pointer_type="cross"
#鼠标移至该点时,横向虚线显示
),
xaxis_opts=opts.AxisOpts(
type_="category",
axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
#鼠标移至某点时,竖趟阴影显示。
),
yaxis_opts=opts.AxisOpts(
name="金额",
type_="value",
min_=0, #y轴最小值
# max_=250, #y轴最大值
# interval=50, #y轴刻度
axislabel_opts=opts.LabelOpts(formatter="{value} 万元"), # y轴坐标显示
axistick_opts=opts.AxisTickOpts(is_show=True),
splitline_opts=opts.SplitLineOpts(is_show=True),
),
)
)
# print(zsrData)
line = (
Line()
.add_xaxis(x_data)
.add_yaxis(
series_name="营业总收入",
yaxis_index=0, #y轴索引,即折线图用哪一个y轴, y轴从左至右的索引值为0,1,2……,即水量y轴索引为0,温度y轴索引为1
y_axis=zsrData,
# 折线图上点的附近否显示数据
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="营业总成本",
yaxis_index=0, #y轴索引,即折线图用哪一个y轴, y轴从左至右的索引值为0,1,2……,即水量y轴索引为0,温度y轴索引为1
y_axis=zcbData,
# 折线图上点的附近否显示数据
label_opts=opts.LabelOpts(is_show=False),
)
.add_yaxis(
series_name="净利润",
yaxis_index=1, #y轴索引,即折线图用哪一个y轴, y轴从左至右的索引值为0,1,2……,即水量y轴索引为0,温度y轴索引为1
y_axis=v,
# 折线图上点的附近否显示数据
label_opts=opts.LabelOpts(is_show=False),
)
)
bar.overlap(line).render("test1.html")
bar.overlap(line).render_notebook()
print("http://localhost:63343/ocr-service/test1.html")