整份讲义占比

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto


# In[2]:


conn = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")

conn1 = presto.connect(host='dw-bigdata.hfjy.red', port='8334')


# In[3]:


sql1 = '''
SELECT
        max(b.subject_name),
        max(b.lesson_type),
        count(b.lesson_plan_id) count_plan
FROM
(
SELECT
        max(a.subject_name) subject_name,
        max(a.lesson_type) lesson_type,
        max(a.lesson_plan_id) lesson_plan_id,
        count(a.lesson_plan_id) is_repeat
FROM
        (
        SELECT
                max(sb.subject_name) subject_name,
                (case lp.lesson_type when 1 then '正式课' when 2 then '试听课' else '' end) lesson_type,
                lp.lesson_plan_id,
                lpqh.paperid,
                if(count(lpq.quiz_id) = count(lpqh.queid),1,0) lesson_flag,
                count(lpq.quiz_id) count_quiz1,
                count(lpqh.queid) count_quiz2
        FROM
                ods_hfjydb.lesson_plan lp
        LEFT JOIN
                ods_hfjydb.lesson_plan_quiz_total lpq ON lpq.lesson_plan_id = lp.lesson_plan_id
        LEFT JOIN
                ods_hfjydb.lesson_plan_quiz_hwlinfo lpqh ON lpqh.lessonplanid = lp.lesson_plan_id and lpqh.quizid = lpq.quiz_id
        LEFT JOIN
                ods_hfjydb.subject sb ON sb.subject_id = lp.subject_id
        WHERE
                lp.adjust_start_time >= '2019-05-01'
                AND lp.adjust_start_time < '2019-06-01'
                AND lp.solve_status <> 6
                AND lp.status in (3,5)
                AND lp.lesson_type in (1,2)
        GROUP BY
                sb.subject_name,lp.lesson_type,lp.lesson_plan_id,lpqh.paperid
        ) a
WHERE
        a.lesson_flag = 1
GROUP BY
        a.lesson_plan_id
) b
WHERE
        b.is_repeat <= 1
GROUP BY
        b.subject_name,b.lesson_type
'''

df1 = pd.read_sql(sql1,conn1)

df1.head()


# In[11]:


sql2 = '''
SELECT
        sb.subject_name,
        (case lp.lesson_type when 1 then '正式课' when 2 then '试听课' else '' end) lesson_type,
        count(lp.lesson_plan_id) count_all
FROM
        ods_hfjydb.lesson_plan lp
LEFT JOIN
        ods_hfjydb.subject sb ON sb.subject_id = lp.subject_id
WHERE
        lp.adjust_start_time >= '2019-05-01'
        AND lp.adjust_start_time < '2019-06-01'
        AND lp.solve_status <> 6
        AND lp.status in (3,5)
        AND lp.lesson_type in (1,2)
GROUP BY
        sb.subject_name,lp.lesson_type
'''
df2 = pd.read_sql(sql2,conn1)

df2.head()


# In[9]:


df1.rename(columns={'max(b.subject_name)': 'subject_name','max(b.lesson_type)':'lesson_type'}, inplace=True)

df1.head()


# In[10]:


df_sub = pd.merge(df1,df2,how='left',on=['subject_name','lesson_type'])

df_sub.head()


# In[7]:


df_sub.to_excel(r'E:/Desktop/6-25-整份讲义比例.xlsx')


# In[12]:


df1.to_excel(r'E:/Desktop/1111.xlsx')

布置作业变化

#!/usr/bin/env python
# coding: utf-8

# In[8]:


import pandas as pd
import pymysql
from pyhive import presto


# In[9]:


conn = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")

conn1 = presto.connect(host='dw-bigdata.hfjy.red', port='8334')


# In[7]:


sql1 = '''
SELECT
        sb.subject_name,
        count(quiz_id) 
FROM
        home_work_detail_info hwd
LEFT JOIN
        home_work hw ON hw.home_work_id  = hwd.home_work_id
LEFT JOIN
        lesson_plan lp ON  hw.lesson_plan_id=lp.lesson_plan_id
LEFT JOIN
        subject sb ON sb.subject_id = lp.subject_id
WHERE
        lp.adjust_start_time >= '2019-05-01'
        AND lp.adjust_start_time < '2019-06-01'
GROUP BY
        sb.subject_name
'''

df1 = pd.read_sql(sql1,conn)

df1.head()


# In[ ]:


sql2 = '''
SELECT
        sb.subject_name,
        count(hwd.quiz_id) 
FROM
        home_work_detail_info hwd
LEFT JOIN
        home_work hw ON hw.home_work_id  = hwd.home_work_id
LEFT JOIN
        lesson_plan lp ON  hw.lesson_plan_id=lp.lesson_plan_id
LEFT JOIN
        quiz qz ON qz.quiz_id = hwd.quiz_id
LEFT JOIN
        subject sb ON sb.subject_id = lp.subject_id
WHERE
        lp.adjust_start_time >= '2019-05-01'
        AND lp.adjust_start_time < '2019-06-01'
        AND qz.category = 2
GROUP BY
        sb.subject_name
'''

df2 = pd.read_sql(sql2,conn)

df2.head()


# In[ ]:


df_sub = pd.merge(df1,df2,how='left',on=['subject_name'])

df_sub.head()


# In[ ]:


df_sub.rename(columns={'subject_name':'学科学段','count(hwd.quiz_id)_x':'题目总数','count(hwd.quiz_id)_y':'客观题总数'},inplace=True)

df_sub.head()


# In[ ]:


df_sub


# In[ ]:


df_sub.to_excel(r'E:/Desktop/6-24-5月一整月作业.xlsx')



文档更新时间: 2019-08-19 11:43   作者:admin