动态课件使用情况

SELECT
        lp.lesson_plan_id as '课程id',
        (select subject_name from subject where lp.subject_id = subject_id ) as '上课学科',
        t.teacher_name as '老师姓名',
        (select name from view_user_info where user_id = lp.student_id) as '学生姓名',
        t.teacher_no as '老师编号',
        s.student_no as '学生编号',
        t.teaching_department as '教学部',
        t.teaching_group as '教学组',
        t.legion_name as '教学团',
        t.attribution_user as '归属培训师',
        lp.adjust_start_time as '上课时间',
        (case when (select count(contract_id) from view_tms_contract tc 
       where tc.student_intention_id = s.student_intention_id 
       and status in (3,4,5,9) and submit_time>lp.adjust_start_time)>0 then '是' 
       else '否' end) as '是否成单',
      lpdt.coursewareName as '课件名称',
      (case when (select count(lpdc.lessonPlanId) from lesson_plan_dynamic_courseware lpdc
             WHERE lpdc.lessonPlanId = lp.lesson_plan_id)>0 then '是' else '否' end) as '是否使用动态课件'
FROM
        lesson_plan lp
LEFT JOIN
        view_student s on s.student_id = lp.student_id
LEFT JOIN
        bidata.zm_teacher_info t on t.teacher_id = lp.teacher_id
LEFT JOIN
        lesson_plan_dynamic_courseware lpdt on lpdt.lessonPlanId = lp.lesson_plan_id
WHERE
        lp.lesson_type = 2 
        and lp.status in (3,5)
        and lp.solve_status <> 6
        and lp.adjust_start_time >= '2019-07-10'
        and lp.adjust_start_time < '2019-07-23'
        and s.account_type = 1
        and t.account_type = '正式'
GROUP BY
        lp.lesson_plan_id

进步维度

#!/usr/bin/env python
# coding: utf-8

# In[116]:


import pandas as pd
import pymysql
from pyhive import presto

conn_mysql = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com",port=3306,user="yanyanglong_read",passwd="jZYhtZNwD6kWbCiJlNBI",db="hfjydb",charset="utf8")
conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')


# In[3]:


#### 先查询出所有满足条件的学生

sql1 = '''
SELECT
        a.student_id,
        a.消课数
FROM
(
SELECT
        s.student_id,
        lp.subject_id,
        count(lp.lesson_plan_id) as '消课数'
FROM
        tms_exam te
LEFT JOIN
        view_student s on s.student_intention_id = te.student_intention_id
LEFT JOIN
        lesson_plan lp on lp.student_id = s.student_id
WHERE
        te.create_time >= '2019-01-01'
        and te.progress is not null
        and lp.lesson_type = 1 
        and lp.status in (3,5)
        and lp.solve_status <> 6
GROUP BY
        s.student_id,lp.subject_id
) a
WHERE
        a.消课数 >= 20
GROUP BY
        a.student_id
'''

df1 = pd.read_sql(sql1,conn_mysql)

df1.head()


# In[3]:


#df1.to_excel(r'e:\desktop\7-23学生数据.xlsx')


# In[4]:


#### 教学开始时学生基础:
#### 学员刚进入海风时课堂10道题 准确率 平均难度系数 这10道题的平均准确率
sql2 = '''
SELECT
        a.*
FROM
(
SELECT
        lpqs.student_id,
        lpqs.correct_status,
        lpqs.quiz_id,
        vhwlqk.difficultfactor,
        lpqs.update_time,
        vhwlqk.quiztempid,
        equi.correct_rate,
        row_number() over(partition by lpqs.student_id order by lpqs.update_time) rank_num
FROM
        dwd_db_hfjydb.lesson_plan_quiz_status lpqs
LEFT JOIN
        dwd_db_hfjydb.view_hwl_quiz_knowledge vhwlqk on vhwlqk.quiztempid = lpqs.quiz_id
LEFT JOIN
        edw.quiz_use_info equi on equi.quiz_id = vhwlqk.hwlquizid
WHERE
        lpqs.student_id in {0}
) a

WHERE
        a.rank_num < 11
'''.format(tuple(df1['student_id']))

df2 = pd.read_sql(sql2,conn_hive)

df2.head()


# In[56]:


dft3 = pd.merge(pd.DataFrame(df2.groupby(['student_id'])['student_id'].count()).rename(columns={'student_id':'allsum'}).reset_index(),pd.DataFrame(df2[df2['correct_status']==1].groupby(['student_id'])['correct_status'].count()).reset_index().rename(columns={'correct_status':'=1sum'}),on='student_id',how='left')

dft3.head()


# In[58]:


#### 课堂题目正确率

dft3['课堂题目正确率'] = dft3['=1sum']/dft3['allsum']
dft3['课堂题目正确率'] = dft3['课堂题目正确率'].apply(lambda x: format(x, '.2%'))
dft3.head()


# In[59]:


#### 平均难度系数

df4 = (df2.groupby(['student_id'])['difficultfactor'].sum()/df2.groupby(['student_id'])['difficultfactor'].count()).reset_index()

df4.head()


# In[60]:


#### 平均准确率

df5 = (df2.groupby(['student_id'])['correct_rate'].sum()/ df2.groupby(['student_id'])['correct_rate'].count()).reset_index()

df5.head()


# In[61]:


df_sub = pd.merge(dft3,df4,how='left',on=['student_id'])
df_sub = pd.merge(df_sub,df5,how='left',on=['student_id'])
df_sub.head()


# In[62]:


df_sub.rename(columns={'difficultfactor':'平均难度系数','correct_rate':'平均准确率'},inplace=True)
df_sub.drop(['allsum','=1sum'],axis=1,inplace=True)
df_sub.head()


# In[ ]:





# In[ ]:





# In[ ]:





# In[65]:


#### 作业明细

sql6 = '''
SELECT
        a.student_id,
        a.home_work_id,
        a.hw_status,
        hwdi.status,
        hwdi.difficult_factor,
        a.rank_num
FROM
        (

        SELECT
                hw.student_id,
                hw.home_work_id,
                hw.status hw_status,
                row_number() over(partition by hw.student_id order by hw.start_time) rank_num
        FROM
                dwd_db_hfjydb.home_work hw
        WHERE
                hw.student_id in {0}
        ) a
LEFT JOIN
        dwd_db_hfjydb.home_work_detail_info hwdi on hwdi.home_work_id = a.home_work_id
WHERE
        a.rank_num < 6
'''.format(tuple(df1['student_id']))

df6 = pd.read_sql(sql6,conn_hive)

df6.head()


# In[120]:


#(df6[df6['hw_status']==1]) | (df6[df6['hw_status']==2])
df6.loc[df6['hw_status']==1]


# In[124]:


df6_1 = df6.loc[(df6['hw_status']==1) | (df6['hw_status']==2),:]
df6_1.head()


# In[125]:


#### 作业完成率

df8 = (df6_1.groupby(['student_id'])['home_work_id'].count()/df6.groupby(['student_id'])['home_work_id'].
       count()).reset_index()
df8['home_work_id'] = df8['home_work_id'].apply(lambda x: format(x, '.2%'))
df8.head()


# In[127]:


df8.rename(columns={'home_work_id':'作业完成率'},inplace=True)
df8.head()


# In[89]:


#### 作业题目正确率

df9 = (df6[df6['status']==1].groupby(['student_id'])['status'].count()/df6.groupby(['student_id'])['student_id'].count()).reset_index()
df9.rename(columns={0:'作业题目正确率'},inplace=True)
df9.head()


# In[90]:


df9['作业题目正确率'] = df9['作业题目正确率'].apply(lambda x: format(x, '.2%'))
df9.head()


# In[107]:


#### 作业平均难度系数

df11 = ((df6.groupby(['student_id'])['difficult_factor'].sum())/(df6[df6['difficult_factor'] != 0].groupby(['student_id'])['difficult_factor']
                                                                 .count())).reset_index()
df11.rename(columns={'difficult_factor':'作业平均难度系数'},inplace=True)
df11.head()


# In[128]:


df_sub2 = pd.merge(df8,df9,how='left',on=['student_id'])
df_sub2 = pd.merge(df_sub2,df11,how='left',on=['student_id'])
df_sub2.head()


# In[129]:


df_sub3 = pd.merge(df_sub,df_sub2,how='left',on=['student_id'])
df_sub3.head()


# In[130]:


df_sub3.to_excel(r'e:\desktop\教学刚开始维度数据V4.xlsx')


# In[131]:


df6.loc[df6['student_id']==1371582]

文档更新时间: 2019-08-19 11:54   作者:admin