작은 메모장

4. 파일 입출력, 메일 자동화, 웹 서비스 크롤링 본문

실더스 루키즈 교육

4. 파일 입출력, 메일 자동화, 웹 서비스 크롤링

으앙내눈 2023. 11. 24. 17:21

저번 파일 입출력에서 계속

 

<사용한 라이브러리>

pip install python-dotenv

 

import os
import time

DIR_WATCH = "static"

previous_files = set(os.listdir(DIR_WATCH))

# WatchDogs
while True:
    time.sleep(1)
    print("Watching..")

    current_files = set(os.listdir(DIR_WATCH))
    new_files = current_files - previous_files

    # Update
    for file in new_files:
        print(f"+ Detected New File : {file}")

        with open(f"{DIR_WATCH}/{file}", "r", encoding="utf-8") as f:
            lines = f.readlines()
            for line in lines:
                # Check
                if line.startswith("#") or line.startswith("//"):
                    print(f"!!Somthing Important Info in {file} content!!")
                    print(line)

    previous_files = current_files

파일 입출력 고급 연습

 

메일 자동화 서비스 구현해보기

IMAP/SMTP 설정 해야함

2단계 인증은 잠시 해제

 

import smtplib
from email.header import Header
from email.mime.text import MIMEText

# Insert Your ID Here
SECRET_IP = "YOUR ID"
SECRET_PASS = "YOUR PASSWORD"

smtp = smtplib.SMTP("smtp.naver.com", 587)
smtp.ehlo()
smtp.starttls()

myemail = "YOUR EMAIL"
youremail = "TARGET EMAIL"

smtp.login(SECRET_IP, SECRET_PASS)

subject = "Subject Test"
message = "Message Test"
msg = MIMEText(message.encode("utf-8"), _subtype="plain", _charset="utf-8")
msg["Subject"] = Header(subject.encode("utf-8"), "utf-8")
msg["From"] = myemail
msg["To"] = youremail
smtp.sendmail(myemail, youremail, msg.as_string())
smtp.quit()

smtp 통신 코드의 기본적인 구성은 다음과 같음

저걸 조금만 응용하면

 

import os
import time
import smtplib
from email.header import Header
from email.mime.text import MIMEText

DIR_WATCH = "static"


def mail_sender(content):
    # Insert Your ID Here
    SECRET_IP = "YOUR ID"
    SECRET_PASS = "YOUR PASSWORD"

    smtp = smtplib.SMTP("smtp.naver.com", 587)
    smtp.ehlo()
    smtp.starttls()

    myemail = "YOUR EMAIL"
    youremail = "TARGET EMAIL"

    smtp.login(SECRET_IP, SECRET_PASS)

    subject = "Detecting new Inportant Data"
    message = f"Automatic catch Information : \n{content}"
    msg = MIMEText(message.encode("utf-8"), _subtype="plain", _charset="utf-8")
    msg["Subject"] = Header(subject.encode("utf-8"), "utf-8")
    msg["From"] = myemail
    msg["To"] = youremail
    smtp.sendmail(myemail, youremail, msg.as_string())
    smtp.quit()


#
previous_files = set(os.listdir(DIR_WATCH))

# WatchDogs
while True:
    time.sleep(1)
    print("Watching..")

    current_files = set(os.listdir(DIR_WATCH))
    new_files = current_files - previous_files

    # Update
    for file in new_files:
        print(f"+ Detected New File : {file}")

        with open(f"{DIR_WATCH}/{file}", "r", encoding="utf-8") as f:
            targetcontent = ""
            flag = False

            lines = f.readlines()
            for line in lines:
                # Check
                if line.startswith("#") or line.startswith("//"):
                    flag = True
                    targetcontent = targetcontent + line

            if flag == True:
                print("Mail Sent")
                mail_sender(targetcontent)

    previous_files = current_files

이런 코드를 만들 수 있음

결과도 잘 되는 것을 확인 가능

 

import os
import time
import smtplib
from dotenv import load_dotenv
from email.header import Header
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication

DIR_WATCH = "static"


def mail_sender(description):
    # Insert Your ID Here
    load_dotenv()
    SECRET_ID = "YOUR ID"
    SECRET_PASS = "YOUR PASSWORD"

    smtp = smtplib.SMTP("smtp.naver.com", 587)
    smtp.ehlo()
    smtp.starttls()

    smtp.login(SECRET_ID, SECRET_PASS)

    myemail = "YOUR EMAIL"
    youremail = "TARGET EMAIL"

    msg = MIMEMultipart()
    # Mail Header
    subject = "Detecting new Important Data"
    msg["Subject"] = Header(subject.encode("utf-8"), "utf-8")
    msg["From"] = myemail
    msg["To"] = youremail

    message = f"Automatic catch Information : \n{description}"
    contentPart = MIMEText(message)
    msg.attach(contentPart)

    # Mail Content
    etc_file_path = r"detected_files.txt"
    with open(etc_file_path, "rb") as f:
        etc_content = MIMEApplication(f.read())
        etc_content.add_header(
            "Content-Disposition", "attachment", filename=etc_file_path
        )
        msg.attach(etc_content)

    smtp.sendmail(myemail, youremail, msg.as_string())
    smtp.quit()


detected_files = "detected_files.txt"
previous_files = set(os.listdir(DIR_WATCH))

""" WatchDogs """
while True:
    time.sleep(1)
    print("Watching..")

    current_files = set(os.listdir(DIR_WATCH))
    new_files = current_files - previous_files

    # Update
    for file in new_files:
        print(f"+ Detected New File : {file}")

        with open(f"{DIR_WATCH}/{file}", "r", encoding="utf-8") as f:
            targetcontent = ""
            flag = False  # Trigger

            lines = f.readlines()
            for line in lines:
                # Check
                if line.startswith("#") or line.startswith("//"):
                    flag = True
                    targetcontent = targetcontent + line

            if flag == True:
                print("> Changes Saved")
                with open(detected_files, "a", encoding="utf-8") as wf:
                    wf.write(
                        f"Detecting new Important Data\n{file} content : \n{targetcontent}\n"
                    )
                print("> Mail Sent")
                mail_sender(targetcontent)

    previous_files = current_files

궁극의 이메일 자동화 프로그램

 

크롤링이란?

크롤링(Crawling) 원하는 사이트 정보? 링크 정보들을 긁어 모으는 작업
웹 서비스 크롤링 - HTML, Javascript, css.. (클라이언트 사이드 스크립트)
API 를 통해 제공해주는 것은 사라지는 중

고객사의 웹 서비스 정보 내에 중요한 정보들이 포함 → 웹 취약점 스캐너 항목에 포함
보안이슈, 보안동향 등 주십 → 데이터를 정재화 → 고객 제공

 

<사용한 라이브러리>

pip install requests

pip install beautifulsoup4

pip install lxml

 

 

import requests
from bs4 import BeautifulSoup as bs

url = "https://www.mbn.co.kr/news/society/4914296"

headers = {"User-Agent": "Mozilla/5.0", "Content-Type": "text/html; charset=utf-8"}

req = requests.get(url, headers=headers)

soup = bs(req.text, "lxml")

links = soup.find_all("a")
for link in links:
    if "href" in link.attrs:
        href = link["href"]
        print(f"{link.text}, link: {href}")

크롤링 연습(find 사용)

 

import requests
from bs4 import BeautifulSoup as bs

url = "http://www.boannews.com/"

headers = {"User-Agent": "Mozilla/5.0", "Content-Type": "text/html; charset=utf-8"}

req = requests.get(url, headers=headers)

soup = bs(req.text, "lxml")

tags = soup.select("#headline2 > ul > li > p")

for tag in tags:
    print(f"- {tag.string}")

크롤링 연습 (select 사용)

 

import requests
from bs4 import BeautifulSoup as bs

target_url = "https://www.malware-traffic-analysis.net/2023/"

headers = {"User-Agent": "Mozilla/5.0", "Content-Type": "text/html; charset=utf-8"}

req = requests.get(target_url + "index.html", headers=headers)
soup = bs(req.text, "lxml")

"""
# Main content : .content > ul
# Indivisual content : [.content > ul] > li > .main_menu
# title : [.content > ul > li > .main_menu] > a
# link : [.content > ul > li > .main_menu] > a.href
"""

origin_contents = soup.select(".content > ul > li > .main_menu")
for content in origin_contents:
    title = content.string
    link = target_url + content["href"]

    with open("crawl_datas.txt", "a", encoding="utf-8") as f:
        f.write(f"@ ISSUES : {title}\n")
        f.write(f"link HERE >> {link}\n\n")

크롤링 실습(이슈 자동 가져오기)

 

엑셀 제작

import openpyxl
from faker import Faker

workbook = openpyxl.Workbook()
worksheet = workbook.active

worksheet["A1"] = "name"
worksheet["B1"] = "email"
worksheet["C1"] = "phone_number"

fake = Faker("ko_KR")

for row in range(2, 50):
    worksheet.cell(row=row, column=1, value=fake.name())
    worksheet.cell(row=row, column=2, value=fake.email())
    worksheet.cell(row=row, column=3, value=fake.phone_number())

workbook.save("member.xlsx")

엑셀 제작 연습