拉取题目纠错

#!/usr/bin/env python
# coding: utf-8

# In[4]:


import sys
sys.path.append(r'e:\Desktop\新环境\DataMachine_for_timers\src')

from utils import mysqlhelper,emailhelper
from config import config
import re
import pandas as pd

mysqlinstance_tm_new_hfjy = mysqlhelper(**config.tm_new_hfjy)
mysqlinstance_tm_new_bidata = mysqlhelper(**config.tm_new_bidata)
mysqlinstance_dmart_bidata = mysqlhelper(**config.dmart_bidata)
mysqlinstance_hrm_audit = mysqlhelper(**config.hrm_audit)


# In[5]:


def get_data(start_date,end_date):
        sql1 = '''
               SELECT 
                DISTINCT
                jh.id 讲义ID,
                hqe.createtime 日期,
                hqe.queid 题目ID,
                hqe.paperid paperID,
                jh.handouts_name 讲义名称,
                jh.first_online_time 首次上线时间,
                sub.subject_name 科目,
                hqe.errortype 错误类型,
                hqe.description 错误详情,
                hqe.teacherid 教师ID,
                vt.teacher_no 教师编号,
                vui.name 教师NAME,
                case hqk.contenttype  when 4 then 'diy' when 5 then 'hwl题库' end as 题目添加类型,
                (select gd.`value` from bidata.gen_dict gd where gd.type='paper_status' and gd.code=jh.`status`) 讲义状态,
                (select dd.value from tms_ddic dd where jd.grade_id=dd.code and dd.type='JY002') 年级,
                (select dd.value from tms_ddic dd where jd.version_id=dd.code and dd.type='JY001') 版本,
                    jh.principal 讲义负责人id,
                vui1.name 讲义负责人,
                (select gd.`value` from bidata.gen_dict gd where gd.type='course_type' and gd.code=jd.course_type)课程类型,     
                (select gd.`value` from bidata.gen_dict gd where gd.type='course_difficulty' and gd.code=jd.course_difficulty) 难度,
                jd.subject_id 教材所属学科ID,
                substring(sub.subject_name,3,2) 教材所属学科名称,
                (case when jd.grade_id in (1,2,3,4,5) then '小学'
                when jd.grade_id in (7,8,9,6) then '初中'
                else '高中'end) 教材所属年级
            from hwl_quiz_error hqe
            LEFT JOIN 
            view_hwl_quiz_knowledge hqk
            on hqe.queid = hqk.hwlquizid
            LEFT JOIN view_teacher vt
            on hqe.teacherid = vt.teacher_id
            LEFT JOIN view_user_info vui
            on hqe.teacherid = vui.user_id
            left join jx_handouts jh
            on hqe.paperid = jh.paper_id
            left join  jx_directory jd ON jd.id = jh.directory_id
            left join  subject sub on sub.subject_id = jd.subject_id
            left join  view_user_info vui1 on vui1.user_id=jh.principal
            where hqe.createtime >='{0} 00:00:00'
            and hqe.createtime <'{1} 00:00:00'
            and vui.name not like'%测试%' and jh.is_del = 0
            '''.format(start_date, end_date)
        df_1 = mysqlinstance_tm_new_hfjy.get_df(sql1).astype(str)

        # 联audit库
        sql2 = '''
                    SELECT 
                    FROM_UNIXTIME(feedTime, '%Y-%m-%d %H:%i:%s') date,
                    submitId as `submitId`,
                    labelId,
                    content
                    from 
                    view_wo_order
                    where 
                    submitType = 1
                    and type = 3
                    and FROM_UNIXTIME(feedTime, '%Y-%m-%d %H:%i:%s')>='{0} 00:00:00'
                    and FROM_UNIXTIME(feedTime, '%Y-%m-%d %H:%i:%s')<'{1} 00:00:00'
                    and labelId is not null and labelId <> ''
            '''.format(start_date, end_date)

        df1 = mysqlinstance_hrm_audit.get_df(sql2).astype(str)
        # print(df1.columns)
        # 联hfjy库
        sql3 = '''
                select 
                vt.teacher_id,vt.teacher_no,vi.name,group_concat(s.subject_name) subject
                from 
                view_teacher vt
                inner join view_user_info vi 
                on vt.teacher_id = vi.user_id
                INNER JOIN teacher_subject_rel tsr
                on vt.teacher_id = tsr.teacher_id
                INNER JOIN subject s
                on tsr.subject_id = s.subject_id
                where vi.name not like '%测试%'
                and vi.name not like '%授课支持%'
                group by 
                vt.teacher_id,vi.name
            '''

        df2 = mysqlinstance_tm_new_hfjy.get_df(sql3).astype(str)
        # 强哥改进版(list)
        df_merge = pd.merge(df1, df2, how='inner',left_on='submitId', right_on='teacher_id')
        # 使用replace函数进行选项与中文的替换
        for i, j in df_merge.iterrows():
            d = {'12': '课堂语音', '13': '作业', '14': '智学派', '15': '海贝',
                 '16': '微课', '17': '其它', '18': '课件', '19': '备课'}
            for k, v in d.items():
                df_merge.loc[i, 'labelId'] = df_merge.loc[i,
                                                          'labelId'].replace(k, v)
        col_chose = ['date', 'submitId', 'teacher_no',
                     'name', 'labelId', 'content']
        df_2 = df_merge.loc[:, col_chose].rename(
            columns={'date': '反馈时间', 'name': '教师姓名', 'submitId': '教师ID', 'sub': '客观选项', 'content': '主观内容',
                     'labelId': '标签类别', 'teacher_no': '教师编号'})
        return df_1,df_2


# In[11]:


df1_1,df_2 = get_data('2019-06-01','2019-07-19')


# In[7]:


df1_1.head()


# In[9]:


df_2.head()


# In[12]:


df1_1.to_excel(r'e:\Desktop\严选题目纠错数据.xlsx')
df_2.to_excel(r'e:\Desktop\授课端问题反馈数据.xlsx')

文档更新时间: 2019-08-19 11:54   作者:admin