GMU:Bots 'n' Plots/Christopher Marx: Difference between revisions

GMU:Bots 'n' Plots/Christopher Marx (view source)

Revision as of 20:41, 3 October 2015

6,592 bytes added , 3 October 2015

mayor changes

Christopher Marx

31

edits

@@ Line 1: / Line 1: @@
-Page of Christopher Marx
+== Idiomaggio - a language detection bot ==
+[[File:header.png]]
+"The limits of my language are the limits of my world." (Ludwig Wittgenstein)
+Idiomaggio is a twitter bot that detects the language of tweets and responds automatically in the right tongue. In a multilingual world there can be many different languages surround you. With the help of Idiomaggio you can figure out which language is spoken
+The twitter bot is working with the NLTK library; mainly with the built-in function ''stop words''. Linking words, conjunctions, articles and pronouns are words that make a language speakable. However, they don't create the meaning of a text. Idiomaggio filters the stop words out and detects their language. In a second step, the tweet will be responded in the correct language.
+'''[https://twitter.com/idiomaggio]'''
+Idiamaggio understands the European languages Swedish, Danish, Hungarian, Finnish, Portugese, German, Dutch, French, Spanish, Norwegian, English, Russian, Turkish and Italian.
+[[File:English.png]]
+[[File:Spanish.png]]
+[[File:French.png]]
+[[File:Italian.png]]
+'''The Code'''
+<source lang="python">
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*- #
+from twitterbot import TwitterBot
+import keys
+import nltk
+from nltk import wordpunct_tokenize
+from nltk.corpus import stopwords
+class Idiomaggio(TwitterBot):
+    def bot_init(self):
+        """
+        Use your own consumer key to make the bot alive.
+        """
+        ############################
+        # REQUIRED: LOGIN DETAILS! #
+        ############################
+        self.config['api_key'] = keys.consumer_key
+        self.config['api_secret'] = keys.consumer_secret
+        self.config['access_key'] = keys.access_token
+        self.config['access_secret'] = keys.access_token_secret
+        ######################################
+        # SEMI-OPTIONAL: OTHER CONFIG STUFF! #
+        ######################################
+        # how often to tweet, in seconds
+        self.config['tweet_interval'] = 1 * 10     # default: 30 minutes
+        # use this to define a (min, max) random range of how often to tweet
+        # e.g., self.config['tweet_interval_range'] = (5*60, 10*60) # tweets every 5-10 minutes
+        self.config['tweet_interval_range'] = None
+        # only reply to tweets that specifically mention the bot
+        self.config['reply_direct_mention_only'] = True
+        # only include bot followers (and original tweeter) in @-replies
+        self.config['reply_followers_only'] = True
+        # fav any tweets that mention this bot?
+        self.config['autofav_mentions'] = False
+        # fav any tweets containing these keywords?
+        self.config['autofav_keywords'] = []
+        # follow back all followers?
+        self.config['autofollow'] = False
+        ###########################################
+        # CUSTOM: your bot's own state variables! #
+        ###########################################
+    def on_scheduled_tweet(self):
+        pass
+    def on_mention(self, tweet, prefix):
+        text = tweet.text
+        print(text)
+        print(type(text))
+        percentage = {}
+        tokens = wordpunct_tokenize(text)
+        words = []
+        for word in tokens:
+            words.append(word.lower())
+        for language in stopwords.fileids():
+            stopwords_set = set(stopwords.words(language))
+            words_set = set(words)
+            most_common = words_set.intersection(stopwords_set)
+            percentage[language] = len(most_common)
+        most_probable = max(percentage, key=percentage.get)
+        if most_probable == "danish":
+            response = u'Hej! Taler du dansk?'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "dutch":
+            response = u'Hi! Groeten uit Holland.'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "english":
+            response = u'Hey! I speak some English.'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "finnish":
+            response = u'Hei! Terveisiä Suomi.'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "french":
+            response = u'Salut! Parlez-vous français?'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "german":
+            response = u'Hey! Sprichst du deutsch?'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "hungarian":
+            response = u'Hello! Beszélsz magyarul?'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "italian":
+            response = u'Ciao! Saluti da Italia.'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "norwegian":
+            response = u'Hei! Jeg snakker norsk.'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "portuguese":
+            response = u'Olá! Você fala português?'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "russian":
+            response = u'Привет! Привет из России.'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "spanish":
+            response = u'¡Hola! Saludos desde España.'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "swedish":
+            response = u'Hej! Talar du svenska?'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        if most_probable == "turkish":
+            response = u'Merhaba! Türkçe biliyor musun?'
+            prefixed = prefix + ' ' + response
+            self.post_tweet(prefixed, reply_to=tweet)
+        # print(percentage)
+        # print(stopwords_set)
+        # print(words_set)
+        print("The language of your text is %s" % most_probable)
+    def on_timeline(self, tweet, prefix):
+        pass
+if __name__ == '__main__':
+    bot = Idiomaggio()
+    bot.run()
+</source>
 [[File:myrobot.png|100px100px|thumb|left]]