129 lines
3.8 KiB
Python
129 lines
3.8 KiB
Python
import sys
|
|
import os
|
|
from os.path import basename
|
|
import spotipy
|
|
import spotipy.oauth2 as oauth2
|
|
import re
|
|
from titlecase import titlecase
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
def improve_song_names(songs):
|
|
char_filters = "()[]{}-:_/=+\"\'"
|
|
word_filters = ('lyrics', 'lyric', 'by', 'video', 'official', 'hd', 'dirty', 'with', 'lyrics', 'feat', 'original', 'mix',
|
|
'www', 'com', 'mp3', 'audio', 'remixed', 'remix', 'full', 'version', 'music', 'hq', 'uploaded', 'explicit')
|
|
|
|
reg_exp = 's/^\d\d //'
|
|
improved_names = []
|
|
for song in songs:
|
|
song = song.strip()
|
|
song = song.lstrip("0123456789.- ")
|
|
# re.sub(reg_exp, '', song)
|
|
song = song[0:-4]
|
|
song = ''.join(
|
|
map(lambda c: " " if c in char_filters else c, song))
|
|
|
|
song = re.sub('|'.join(re.escape(key) for key in word_filters),
|
|
"", song, flags=re.IGNORECASE)
|
|
|
|
song = ' '.join(song.split())
|
|
improved_names.append(song.strip())
|
|
|
|
return improved_names
|
|
|
|
|
|
def get_song_name(title, artist):
|
|
return title + ' - ' + artist
|
|
|
|
|
|
def get_lyrics_genius(song_name):
|
|
GENIUS_KEY = "TxvC3bwsONZ1lh3j8Idq-VFBXKk5SlwYoAyGwkWFvbqkJB8vwdMGV7b5Q4sJSTd2"
|
|
base_url = "https://api.genius.com"
|
|
headers = {'Authorization': 'Bearer %s' % (GENIUS_KEY)}
|
|
search_url = base_url + "/search"
|
|
data = {'q': song_name}
|
|
|
|
response = requests.get(search_url, data=data, headers=headers)
|
|
json = response.json()
|
|
|
|
try:
|
|
song_info = json['response']['hits'][0]['result']['api_path']
|
|
except KeyError:
|
|
print("Could not find lyrics")
|
|
return None
|
|
|
|
song_url = base_url + song_info
|
|
response = requests.get(song_url, headers=headers)
|
|
json = response.json()
|
|
song_path = json['response']['song']['path']
|
|
song_url = "http://genius.com" + song_path
|
|
page = requests.get(song_url)
|
|
html = BeautifulSoup(page.text, "html.parser")
|
|
|
|
# remove script tags that they put in the middle of the lyrics
|
|
[h.extract() for h in html('script')]
|
|
|
|
lyrics = html.find("div", class_="lyrics").get_text()
|
|
lyrics.replace('\n', ' ')
|
|
return lyrics
|
|
|
|
|
|
def get_metadata_spotify(spotify, song_name):
|
|
metadata = {}
|
|
meta_tags = spotify.search(song_name, limit=1)['tracks']['items'][0]
|
|
|
|
metadata['title'] = meta_tags['name']
|
|
metadata['artist'] = meta_tags['artists'][0]['name']
|
|
metadata['album'] = meta_tags['album']['name']
|
|
metadata['album_artist'] = meta_tags['album']['artists'][0]['name']
|
|
|
|
album_id = meta_tags['album']['id']
|
|
album_meta_tags = spotify.album(album_id)
|
|
|
|
metadata['release-date'] = album_meta_tags['release_date']
|
|
try:
|
|
metadata['genre'] = titlecase(album_meta_tags['genres'][0])
|
|
except IndexError:
|
|
try:
|
|
artist_id = meta_tags['artists'][0]['id']
|
|
artist_meta_tags = spotify.artist(artist_id)
|
|
metadata['genre'] = titlecase(artist_meta_tags['genres'][0])
|
|
|
|
except IndexError:
|
|
pass
|
|
|
|
metadata['track_num'] = meta_tags['track_number']
|
|
metadata['disc_num'] = meta_tags['disc_number']
|
|
|
|
metadata['albumart'] = meta_tags['album']['images'][0]['url']
|
|
metadata['lyrics'] = get_lyrics_genius(
|
|
get_song_name(metadata['title'], metadata['artist']))
|
|
|
|
return metadata
|
|
|
|
|
|
def list_files():
|
|
files = []
|
|
return [f for f in os.listdir('.') if f.endswith('.mp3')]
|
|
|
|
|
|
def main():
|
|
auth = oauth2.SpotifyClientCredentials(
|
|
client_id='622a0e16a4914e3eadc2a37b4a134f1e',
|
|
client_secret='6fe008a8b7754954a58a9849fa3172df')
|
|
token = auth.get_access_token()
|
|
spotify = spotipy.Spotify(auth=token)
|
|
|
|
files = list_files()
|
|
improved_name = improve_song_names(files)
|
|
|
|
# print(improved_name[19])
|
|
# print(files[16])
|
|
metadata = get_metadata_spotify(
|
|
spotify, improved_name[19])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|