经典题库错题数

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import sys
sys.path.append(r'e:\Desktop\新环境\DataMachine_for_timer\src')
import pandas as pd
import numpy as np
import pymysql
from ML import data_suit,plot_suit
from utils import mysqlhelper,elapse,prestohelper
from config import config
from datetime import datetime , time
from pyhive import presto

## 脱敏库
mysqlinstance_tm_new_hfjy = mysqlhelper(**config.tm_new_hfjy)
mysqlinstance_tm_new_bidata = mysqlhelper(**config.tm_new_bidata)

## dmart库
mysqlinstance_dmart_dmart = mysqlhelper(**config.dmart_dmart)
mysqlinstance_dmart_bidata = mysqlhelper(**config.dmart_bidata)

## hrm库
mysqlinstance_hrm_hrm_sale = mysqlhelper(**config.hrm_hrm_sale)
mysqlinstance_hrm_new_hrm = mysqlhelper(**config.hrm_new_hrm)
mysqlinstance_hrm_jobhunt = mysqlhelper(**config.hrm_jobhunt)

## hive库
prestoinstance = prestohelper(**config.hive_prosto)


# In[2]:


## 作业中错题统计

sql1 = '''
SELECT
        hw.quiz_id,
        count(hw.quiz_id),
        sb.subject_name
FROM
        ods_db_hfjydb.home_work_detail_info hw
LEFT JOIN
        ods_db_hfjydb.lesson_plan_quiz lpq ON lpq.quiz_id = hw.quiz_id
LEFT JOIN
        ods_db_hfjydb.lesson_plan lp ON lp.lesson_plan_id = lpq.lesson_plan_id
LEFT JOIN
        ods_db_hfjydb.subject sb ON sb.subject_id = lp.subject_id
WHERE
        hw.type = 1
        AND lpq.type in (1,2)
        AND hw.status = 0
GROUP BY
        hw.quiz_id,sb.subject_name
'''

df1 = prestoinstance.get_df(sql1)

df1.head()


# In[3]:


df_1 = df1.rename(columns={'quiz_id':'题目id','_col1':'作业中该题错题次数','subject_name':'学科学段'})

df_1.head()


# In[4]:


df_1.dtypes


# In[5]:


## 课堂中错题统计

sql2 = '''
SELECT
        lpqs.quiz_id '题目id',
        sb.subject_name '学科学段',
        count(lpqs.lesson_plan_id) '课堂中该题错题次数'

FROM
        lesson_plan_quiz_status lpqs
LEFT JOIN
        lesson_plan lp ON lp.lesson_plan_id = lpqs.lesson_plan_id
LEFT JOIN
        subject sb ON sb.subject_id = lp.subject_id
WHERE
        lpqs.correct_status = 0
GROUP BY
        lpqs.quiz_id
'''

df2 = mysqlinstance_tm_new_hfjy.get_df(sql2)

df2.head()


# In[6]:


df2.dtypes


# In[7]:


df_sub = pd.merge(df_1,df2,how='left',on=['题目id','学科学段'])

df_sub.head()


# In[8]:


## 试卷中错题统计

sql3 = '''
SELECT
        stpq.quiz_id '题目id',
        count(stpq.id) '试卷中该题错题次数'
FROM
        student_test_paper_quiz stpq
GROUP BY
        stpq.quiz_id
'''

df3 = mysqlinstance_tm_new_hfjy.get_df(sql3)

df3.head()


# In[10]:


df_sub = pd.merge(df_sub,df3,how='left',on=['题目id'])

df_sub.head()


# In[13]:


temp = df_sub[['作业中该题错题次数','课堂中该题错题次数','试卷中该题错题次数']]


# In[14]:


df_sub['总数'] = temp.sum(axis=1)


# In[15]:


df_sub


# In[21]:


df_sub.drop(['作业中该题错题次数','课堂中该题错题次数','试卷中该题错题次数'],axis=1,inplace=True)


# In[22]:


df_sub.to_excel(r'e:\Desktop\经典错题总数.xlsx',index=False,encoding='GB18030')

文档更新时间: 2019-08-19 11:37   作者:admin