教研题目使用率

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import pymysql
from pyhive import presto

conn_hive = presto.connect(host='dw-bigdata.hfjy.red', port='8334')


# In[31]:


sql1 = '''
SELECT
    a.lesson_plan_id,
    max(a.lesson_type) lesson_type,
    max(a.adjust_start_time) adjust_start_time,
    max(a.subject_name) subject_name,
    count(case when (a.stay_time >= 10 AND a.subject_two='小学') then a.quiz_id when (a.stay_time >= 30 AND (a.subject_two='初中' OR a.subject_two='高中')) then a.quiz_id  end) quiz_count,
    count(case when (a.type in (1,3,4) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type in (1,3,4) AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hf_quiz_count,
    count(case when (a.type = 2 AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type = 2 AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  homework_quiz_count,
    count(case when (a.type in (8,9) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type in (8,9) AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hwl_quiz_count,
    count(case when (a.type = 6 AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.type = 6 AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  zdy_quiz_count,
    count(case when (a.hwl_type='严选智能题库' AND a.type in (8,9) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.hwl_type='严选智能题库' AND a.type in (8,9) AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hwl_zn_count,
    count(case when (a.hwl_type='严选讲义' AND a.type in (8,9) AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.hwl_type='严选讲义' AND a.type in (8,9) AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hwl_jy_count,
    count(case when (a.homework_type='作业中严选智能题库' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中严选智能题库' AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hw_zn_count,
    count(case when (a.homework_type='作业中精品讲义题目' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中精品讲义题目' AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hw_jy_count,
    count(case when (a.homework_type='作业中自定义的题目' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中自定义的题目' AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hw_zdy_count,
    count(case when (a.homework_type='作业中经典的题目' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中经典的题目' AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hw_jd_count,
    count(case when (a.homework_type='作业中的作业题目数' AND a.subject_two='小学' AND a.stay_time >= 10) THEN a.quiz_id WHEN (a.homework_type='作业中的作业题目数' AND (a.subject_two='初中' OR a.subject_two='高中')  AND a.stay_time >= 30 ) THEN a.quiz_id end)  hw_hw_count
FROM
(
    SELECT
        lp.lesson_plan_id,
        lp.lesson_type,
        lp.adjust_start_time,
        sub.subject_name,
        SUBSTR(sub.subject_name,1,2) subject_two,
        lpq.quiz_id,
        lpq.type,
        (case when jh.id is null and lpq.type in (8,9) then '严选智能题库' when jh.id > 0 then '严选讲义' else '其他' end) hwl_type,
        (case when lpq.type = 2 and hwdi.type = 1 AND hwdih.homeworkid IS NULL then '作业中经典的题目' when lpq.type = 2 and hwdi.type = 6 AND hwdih.homeworkid IS NULL then '作业中自定义的题目' when lpq.type = 2 and hwdi.type in (8,9) and hwdih.homeworkid IS NULL then '作业中严选智能题库' when lpq.type = 2 and hwdi.type = 8 and hwdih.homeworkid IS NOT NULL then '作业中精品讲义题目' WHEN lpq.type = 2 AND hwdi.type=2 AND hwdih.homeworkid IS NULL THEN '作业中的作业题目数' else '其他'end ) homework_type,  
        quiz.stay_time
    FROM
        dwd_db_hfjydb.lesson_plan lp
    INNER JOIN
        dwd_db_hfjydb.subject sub on  sub.subject_id = lp.subject_id
    INNER JOIN
        dwd_db_hfjydb.lesson_plan_quiz_total lpq on lpq.lesson_plan_id = lp.lesson_plan_id
    LEFT JOIN
        dwd_db_hfjydb.lesson_plan_quiz_hwlinfo lpqh on lpqh.lessonplanid = lp.lesson_plan_id and lpqh.quizid = lpq.quiz_id
    LEFT JOIN
        dwd_db_hfjydb.jx_handouts jh ON jh.paper_id = lpqh.paperid and jh.is_del = 0
    LEFT JOIN
        dwd_db_hfjydb.home_work_detail_info hwdi ON hwdi.Id = lpq.quiz_id and lpq.type = 2
    LEFT JOIN
        dwd_db_hfjydb.home_work_detail_info_hwlinfo hwdih ON hwdih.homeworkid = hwdi.home_work_id and hwdi.quiz_id = hwdih.quizid
    LEFT JOIN
        (select ilrqi.lesson_plan_id,ilrqi.quiz_id,ilrqi.stay_time from edw.ls_learning_record_quiz_info ilrqi where ilrqi.real_start_time >= '2019-07-01' and ilrqi.real_start_time < '2019-08-01') quiz
        ON quiz.lesson_plan_id= lpq.lesson_plan_id and lpq.quiz_id = cast (quiz.quiz_id as int)
    WHERE lp.status in (3,5)
            AND lp.solve_status <> 6
            AND lp.lesson_type in (1,2)
            AND lp.adjust_start_time >= '2019-07-01'
            AND lp.adjust_start_time < '2019-08-01'
) a
GROUP BY
        a.lesson_plan_id
'''

df1 = pd.read_sql(sql1,conn_hive)

df1.head()


# In[32]:


df1.shape


# In[40]:


df1.dtypes


# In[33]:


df1.rename(columns={'lesson_plan_id':'课程id','lesson_type':'课程类型','adjust_start_time':'上课时间','subject_name':'学科','quiz_count':'题目总数','hf_quiz_count':'经典题目总数','homework_quiz_count':'作业题目总数','hwl_quiz_count':'好未来题目总数','zdy_quiz_count':'自定义题目总数','hwl_zn_count':'严选智能题库数','hwl_jy_count':'严选讲义','hw_zn_count':'作业中严选智能题库数','hw_zdy_count':'作业中自定义的题目数','hw_jy_count':'作业中精品讲义题目数','hw_jd_count':'作业中经典的题目','hw_hw_count':'作业中的作业题目数'},inplace=True)

df1.head(10)


# In[34]:


df1[df1['作业题目总数'] == (df1['作业中严选智能题库数']+df1['作业中精品讲义题目数']+df1['作业中自定义的题目数']+df1['作业中经典的题目']+df1['作业中的作业题目数'])].loc[:,['课程id','作业题目总数','作业中严选智能题库数','作业中精品讲义题目数','作业中自定义的题目数','作业中经典的题目','作业中的作业题目数']]


# In[37]:


df1[df1['好未来题目总数'] == (df1['严选智能题库数']+df1['严选讲义'])].loc[:,['课程id','好未来题目总数','严选智能题库数','严选讲义']]


# In[38]:


df1[df1['题目总数'] == (df1['经典题目总数']+df1['作业题目总数']+df1['好未来题目总数']+df1['自定义题目总数'])]


# In[39]:


df1.to_csv(r"C:\Users\hp\Desktop\题目使用率V4.csv",encoding="utf_8_sig")


# In[ ]:




文档更新时间: 2019-08-20 20:26   作者:admin