31
edits
(text change) |
(mayor changes) |
||
Line 1: | Line 1: | ||
== Idiomaggio - a language detection bot == | |||
[[File:header.png]] | |||
"The limits of my language are the limits of my world." (Ludwig Wittgenstein) | |||
Idiomaggio is a twitter bot that detects the language of tweets and responds automatically in the right tongue. In a multilingual world there can be many different languages surround you. With the help of Idiomaggio you can figure out which language is spoken | |||
The twitter bot is working with the NLTK library; mainly with the built-in function ''stop words''. Linking words, conjunctions, articles and pronouns are words that make a language speakable. However, they don't create the meaning of a text. Idiomaggio filters the stop words out and detects their language. In a second step, the tweet will be responded in the correct language. | |||
'''[https://twitter.com/idiomaggio]''' | |||
Idiamaggio understands the European languages Swedish, Danish, Hungarian, Finnish, Portugese, German, Dutch, French, Spanish, Norwegian, English, Russian, Turkish and Italian. | |||
[[File:English.png]] | |||
[[File:Spanish.png]] | |||
[[File:French.png]] | |||
[[File:Italian.png]] | |||
'''The Code''' | |||
<source lang="python"> | |||
#!/usr/bin/env python2 | |||
# -*- coding: utf-8 -*- # | |||
from twitterbot import TwitterBot | |||
import keys | |||
import nltk | |||
from nltk import wordpunct_tokenize | |||
from nltk.corpus import stopwords | |||
class Idiomaggio(TwitterBot): | |||
def bot_init(self): | |||
""" | |||
Use your own consumer key to make the bot alive. | |||
""" | |||
############################ | |||
# REQUIRED: LOGIN DETAILS! # | |||
############################ | |||
self.config['api_key'] = keys.consumer_key | |||
self.config['api_secret'] = keys.consumer_secret | |||
self.config['access_key'] = keys.access_token | |||
self.config['access_secret'] = keys.access_token_secret | |||
###################################### | |||
# SEMI-OPTIONAL: OTHER CONFIG STUFF! # | |||
###################################### | |||
# how often to tweet, in seconds | |||
self.config['tweet_interval'] = 1 * 10 # default: 30 minutes | |||
# use this to define a (min, max) random range of how often to tweet | |||
# e.g., self.config['tweet_interval_range'] = (5*60, 10*60) # tweets every 5-10 minutes | |||
self.config['tweet_interval_range'] = None | |||
# only reply to tweets that specifically mention the bot | |||
self.config['reply_direct_mention_only'] = True | |||
# only include bot followers (and original tweeter) in @-replies | |||
self.config['reply_followers_only'] = True | |||
# fav any tweets that mention this bot? | |||
self.config['autofav_mentions'] = False | |||
# fav any tweets containing these keywords? | |||
self.config['autofav_keywords'] = [] | |||
# follow back all followers? | |||
self.config['autofollow'] = False | |||
########################################### | |||
# CUSTOM: your bot's own state variables! # | |||
########################################### | |||
def on_scheduled_tweet(self): | |||
pass | |||
def on_mention(self, tweet, prefix): | |||
text = tweet.text | |||
print(text) | |||
print(type(text)) | |||
percentage = {} | |||
tokens = wordpunct_tokenize(text) | |||
words = [] | |||
for word in tokens: | |||
words.append(word.lower()) | |||
for language in stopwords.fileids(): | |||
stopwords_set = set(stopwords.words(language)) | |||
words_set = set(words) | |||
most_common = words_set.intersection(stopwords_set) | |||
percentage[language] = len(most_common) | |||
most_probable = max(percentage, key=percentage.get) | |||
if most_probable == "danish": | |||
response = u'Hej! Taler du dansk?' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "dutch": | |||
response = u'Hi! Groeten uit Holland.' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "english": | |||
response = u'Hey! I speak some English.' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "finnish": | |||
response = u'Hei! Terveisiä Suomi.' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "french": | |||
response = u'Salut! Parlez-vous français?' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "german": | |||
response = u'Hey! Sprichst du deutsch?' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "hungarian": | |||
response = u'Hello! Beszélsz magyarul?' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "italian": | |||
response = u'Ciao! Saluti da Italia.' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "norwegian": | |||
response = u'Hei! Jeg snakker norsk.' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "portuguese": | |||
response = u'Olá! Você fala português?' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "russian": | |||
response = u'Привет! Привет из России.' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "spanish": | |||
response = u'¡Hola! Saludos desde España.' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "swedish": | |||
response = u'Hej! Talar du svenska?' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
if most_probable == "turkish": | |||
response = u'Merhaba! Türkçe biliyor musun?' | |||
prefixed = prefix + ' ' + response | |||
self.post_tweet(prefixed, reply_to=tweet) | |||
# print(percentage) | |||
# print(stopwords_set) | |||
# print(words_set) | |||
print("The language of your text is %s" % most_probable) | |||
def on_timeline(self, tweet, prefix): | |||
pass | |||
if __name__ == '__main__': | |||
bot = Idiomaggio() | |||
bot.run() | |||
</source> | |||
[[File:myrobot.png|100px100px|thumb|left]] | [[File:myrobot.png|100px100px|thumb|left]] |
edits