Files
codex_jxs_code/gmail_reader.py

209 lines
6.9 KiB
Python
Raw Normal View History

2026-03-02 10:55:39 +08:00
"""
Gmail API 邮件读取工具
使用前准备
1. 访问 https://console.cloud.google.com/ 创建项目
2. 启用 Gmail API
3. 创建 OAuth 2.0 凭据桌面应用类型下载 credentials.json 放到本目录
4. 首次运行会弹出浏览器授权授权后自动生成 token.json
"""
import os
import base64
import json
from datetime import datetime
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from email.utils import parsedate_to_datetime
# 只读权限
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
# 凭据文件路径(和脚本同目录)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
CREDENTIALS_FILE = os.path.join(BASE_DIR, 'credentials.json')
TOKEN_FILE = os.path.join(BASE_DIR, 'token.json')
def get_service():
"""获取 Gmail API 服务实例"""
creds = None
# 尝试加载已有 token
if os.path.exists(TOKEN_FILE):
creds = Credentials.from_authorized_user_file(TOKEN_FILE, SCOPES)
# token 无效或过期,重新授权
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
if not os.path.exists(CREDENTIALS_FILE):
print(f"❌ 找不到 {CREDENTIALS_FILE}")
print("请从 Google Cloud Console 下载 OAuth 凭据文件,命名为 credentials.json 放到本目录")
return None
flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES)
creds = flow.run_local_server(port=0)
# 保存 token 供下次使用
with open(TOKEN_FILE, 'w') as f:
f.write(creds.to_json())
return build('gmail', 'v1', credentials=creds)
def decode_body(payload):
"""递归解析邮件正文(优先纯文本)"""
# 直接有 body data
if 'body' in payload and payload['body'].get('data'):
return base64.urlsafe_b64decode(payload['body']['data']).decode('utf-8', errors='ignore')
# 多部分邮件,递归查找
if 'parts' in payload:
# 优先找 text/plain
for part in payload['parts']:
if part.get('mimeType') == 'text/plain':
data = part['body'].get('data', '')
if data:
return base64.urlsafe_b64decode(data).decode('utf-8', errors='ignore')
# 没有纯文本,找 text/html
for part in payload['parts']:
if part.get('mimeType') == 'text/html':
data = part['body'].get('data', '')
if data:
return base64.urlsafe_b64decode(data).decode('utf-8', errors='ignore')
# 递归子部分
for part in payload['parts']:
result = decode_body(part)
if result:
return result
return None
def get_header(headers, name):
"""从 headers 列表中取指定字段"""
for h in headers:
if h['name'].lower() == name.lower():
return h['value']
return ''
def list_emails(service, query='', label_ids=None, max_results=10):
"""
列出邮件
:param query: Gmail 搜索语法 'from:xxx@gmail.com' 'subject:报告' 'is:unread'
:param label_ids: 标签过滤 ['INBOX'], ['UNREAD']
:param max_results: 最多返回条数
"""
params = {'userId': 'me', 'maxResults': max_results}
if query:
params['q'] = query
if label_ids:
params['labelIds'] = label_ids
results = service.users().messages().list(**params).execute()
return results.get('messages', [])
def read_email(service, msg_id):
"""读取单封邮件详情"""
msg = service.users().messages().get(userId='me', id=msg_id, format='full').execute()
headers = msg['payload']['headers']
subject = get_header(headers, 'Subject') or '(无主题)'
sender = get_header(headers, 'From')
to = get_header(headers, 'To')
date_str = get_header(headers, 'Date')
body = decode_body(msg['payload']) or '(无正文)'
# 解析日期
try:
date = parsedate_to_datetime(date_str)
date_str = date.strftime('%Y-%m-%d %H:%M:%S')
except Exception:
pass
return {
'id': msg_id,
'subject': subject,
'from': sender,
'to': to,
'date': date_str,
'body': body,
'labels': msg.get('labelIds', []),
'snippet': msg.get('snippet', ''),
}
def get_attachments(service, msg_id, save_dir=None):
"""下载邮件附件"""
msg = service.users().messages().get(userId='me', id=msg_id, format='full').execute()
attachments = []
if save_dir is None:
save_dir = os.path.join(BASE_DIR, 'attachments')
def _find_attachments(payload):
if 'parts' in payload:
for part in payload['parts']:
filename = part.get('filename', '')
if filename and part['body'].get('attachmentId'):
att = service.users().messages().attachments().get(
userId='me', messageId=msg_id, id=part['body']['attachmentId']
).execute()
data = base64.urlsafe_b64decode(att['data'])
os.makedirs(save_dir, exist_ok=True)
filepath = os.path.join(save_dir, filename)
with open(filepath, 'wb') as f:
f.write(data)
attachments.append({'filename': filename, 'path': filepath, 'size': len(data)})
print(f" 📎 已保存附件: {filename} ({len(data)} bytes)")
# 递归
_find_attachments(part)
_find_attachments(msg['payload'])
return attachments
# ============ 使用示例 ============
if __name__ == '__main__':
service = get_service()
if not service:
exit(1)
print("=" * 60)
print("📬 Gmail 邮件读取")
print("=" * 60)
# --- 示例1: 读取收件箱最近 5 封邮件 ---
print("\n📥 收件箱最近 5 封邮件:\n")
messages = list_emails(service, label_ids=['INBOX'], max_results=5)
for m in messages:
email_data = read_email(service, m['id'])
print(f"📧 主题: {email_data['subject']}")
print(f" 发件人: {email_data['from']}")
print(f" 日期: {email_data['date']}")
print(f" 摘要: {email_data['snippet'][:80]}...")
print()
# --- 示例2: 搜索特定邮件(取消注释使用)---
# messages = list_emails(service, query='subject:报告 is:unread', max_results=5)
# --- 示例3: 读取完整正文 ---
# if messages:
# email_data = read_email(service, messages[0]['id'])
# print(f"\n完整正文:\n{email_data['body']}")
# --- 示例4: 下载附件 ---
# if messages:
# get_attachments(service, messages[0]['id'])