Haskell: actual IO monad implementation, in different language?_问答_开发者

How is IO monad actually implemented?in sense of, what would be the actual implementation of the main function?

How would I call haskell function (IO) from another language and do I in that case need to maintain IO my self?

Does main pulls IO actions (Lazily) as references and then call them? Or it is interpreter job, when it found actions in its way it can call them? Or maybe something else?

Is there good IO monad implementation in different language which can help to deeply understand what happening in main function?

Edit:

Such hGetContents confuses me a lot, and makes me unsure how IO is really implemented.

Ok, let's say I have very simple pure Haskell interpreter witch unfortunately has no IO support and for curiosity I want to add this IO actions to it (unsafeIO tricks also). It is hard to get it from GHC, Hugs or others.

Here is an example of how one could implement the IO monad in Java:

package so.io;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;

import static so.io.IOMonad.*;  
import static so.io.ConsoleIO.*;    

/**
 * This is a type containing no data -- corresponds to () in Haskell.
 */
class Unit {
    public final static Unit VALUE = new Unit(); 
}

/**
 * This type represents a function from A to R
 */
interface Function<A,R> {
    public R apply(A argument);
}

/**
 * This type represents an action, yielding type R
 */
interface IO<R> {
    /**
     * Warning! May have arbitrary side-effects!
     */
    R unsafePerformIO();
}

/**
 * This class, internally impure, provides pure interface for action sequencing (aka Monad)
 */
class IOMonad {
    static <T> IO<T> pure(final T value) {
        return new IO<T>() {
            @Override
            public T unsafePerformIO() {
                return value;
            }
        };
    }

    static <T> IO<T> join(final IO<IO<T>> action) {
        return new IO<T>(){
            @Override
            public T unsafePerformIO() {
                return action.unsafePerformIO().unsafePerformIO();
            }
        };
    }

    static <A,B> IO<B> fmap(final Function<A,B> func, final IO<A> action) {
        return new IO<B>(){
            @Override
            public B unsafePerformIO() {
                return func.apply(action.unsafePerformIO());
            }
        };
    }

    static <A,B> IO<B> bind(IO<A> action, Function<A, IO<B>> func) {
        return join(fmap(func, action));
    }
}

/**
 * This class, internally impure, provides pure interface for interaction with stdin and stdout
 */
class ConsoleIO {
    static IO<Unit> putStrLn(final String line) {
        return new IO<Unit>() {
            @Override
            public Unit unsafePerformIO() {
                System.out.println(line);
                return Unit.VALUE;
            }
        };
    };

    // Java does not have first-class functions, thus this:
    final static Function<String, IO<Unit>> putStrLn = new Function<String, IO<Unit>>() {
        @Override
        public IO<Unit> apply(String argument) {
            return putStrLn(argument);
        }
    };

    final static BufferedReader in = new BufferedReader(new InputStreamReader(System.in));

    static IO<String> getLine = new IO<String>() {
            @Override
            public String unsafePerformIO() {
                try {
                    return in.readLine();
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
        };
}

/**
 * The program composed out of IO actions in a purely functional manner.
 */
class Main {

    /**
     * A variant of bind, which discards the bound value.
     */
    static IO<Unit> bind_(final IO<Unit> a, final IO<Unit> b) {
        return bind(a, new Function<Unit, IO<Unit>>(){
            @Override
            public IO<Unit> apply(Unit argument) {
                return b;
            }
        });
    }

    /**
     * The greeting action -- asks the user for his name and then prints a greeting
     */
    final static IO<Unit> greet = 
            bind_(putStrLn("Enter your name:"), 
            bind(getLine, new Function<String, IO<Unit>>(){
                @Override
                public IO<Unit> apply(String argument) {
                    return putStrLn("Hello, " + argument + "!");
                }
            }));

    /**
     * A simple echo action -- reads a line, prints it back
     */
    final static IO<Unit> echo = bind(getLine, putStrLn);

    /**
     * A function taking some action and producing the same action run repeatedly forever (modulo stack overflow :D)
     */
    static IO<Unit> loop(final IO<Unit> action) {
        return bind(action, new Function<Unit, IO<Unit>>(){
            @Override
            public IO<Unit> apply(Unit argument) {
                return loop(action);
            }
        });
    }

    /**
     * The action corresponding to the whole program
     */
    final static IO<Unit> main = bind_(greet, bind_(putStrLn("Entering the echo loop."),loop(echo)));
}

/**
 * The runtime system, doing impure stuff to actually run our program.
 */
public class RTS {
    public static void main(String[] args) {
        Main.main.unsafePerformIO();
    }
}

This is a runtime system implementing interface to the console I/O together with a small purely functional program which greets the user and then runs an echo loop.

One can't implement the unsafe part in Haskell because Haskell is purely functional language. It is always implemented with lower-level facilities.

With Java 8 Lambdas, you can take the code from Rotsor's answer above, remove the Function class as Java 8 provides a FunctionalInterface with does the same thing and remove the anonymous class cruft to achieve cleaner looking code like so:

package so.io;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.function.Function;

import static so.io.IOMonad.*;
import static so.io.ConsoleIO.*;

/**
 * This is a type containing no data -- corresponds to () in Haskell.
 */
class Unit {

   // -- Unit$

   public final static Unit VALUE = new Unit();

   private Unit() {
   }

}

/** This type represents an action, yielding type R */
@FunctionalInterface
interface IO<R> {

   /** Warning! May have arbitrary side-effects! */
   R unsafePerformIO();

}

/**
 * This, internally impure, provides pure interface for action sequencing (aka
 * Monad)
 */
interface IOMonad {

   // -- IOMonad$

   static <T> IO<T> pure(final T value) {
      return () -> value;
   }

   static <T> IO<T> join(final IO<IO<T>> action) {
      return () -> action.unsafePerformIO().unsafePerformIO();
   }

   static <A, B> IO<B> fmap(final Function<A, B> func, final IO<A> action) {
      return () -> func.apply(action.unsafePerformIO());
   }

   static <A, B> IO<B> bind(IO<A> action, Function<A, IO<B>> func) {
      return join(fmap(func, action));
   }

}

/**
 * This, internally impure, provides pure interface for interaction with stdin
 * and stdout
 */
interface ConsoleIO {

   // -- ConsoleIO$

   static IO<Unit> putStrLn(final String line) {
      return () -> {
         System.out.println(line);
         return Unit.VALUE;
      };
   };

   final static Function<String, IO<Unit>> putStrLn = arg -> putStrLn(arg);

   final static BufferedReader in = new BufferedReader(new InputStreamReader(System.in));

   static IO<String> getLine = () -> {
      try {
         return in.readLine();
      }

      catch (IOException e) {
         throw new RuntimeException(e);
      }
   };

}

/** The program composed out of IO actions in a purely functional manner. */
interface Main {

   // -- Main$

   /** A variant of bind, which discards the bound value. */
   static IO<Unit> bind_(final IO<Unit> a, final IO<Unit> b) {
      return bind(a, arg -> b);
   }

   /**
    * The greeting action -- asks the user for his name and then prints 
    * greeting
    */
   final static IO<Unit> greet = bind_(putStrLn("Enter your name:"),
         bind(getLine, arg -> putStrLn("Hello, " + arg + "!")));

   /** A simple echo action -- reads a line, prints it back */
   final static IO<Unit> echo = bind(getLine, putStrLn);

   /**
    * A function taking some action and producing the same action run repeatedly
    * forever (modulo stack overflow :D)
    */
   static IO<Unit> loop(final IO<Unit> action) {
      return bind(action, arg -> loop(action));
   }

    /** The action corresponding to the whole program */
    final static IO<Unit> main = bind_(greet, bind_(putStrLn("Entering the echo loop."), loop(echo)));

}

/** The runtime system, doing impure stuff to actually run our program. */
public interface RTS {

    // -- RTS$

    public static void main(String[] args) {
       Main.main.unsafePerformIO();
    }

 }

Note that I also changed class-declared static methods to interface-declared static methods. Why? No particular reason, just that you can in Java 8.

If you want to understand the implementation of the IO monad, it's very well described in an award-winning paper by Phil Wadler and Simon Peyton Jones, who were the ones who figured out how to use monads to do input/output in a pure language. The paper is Imperative Functional Programming and is on both authors' web sites.

The IO monad is basically implemented as a state transformer (similar to State), with a special token RealWorld. Each IO operation depends on this token and passes it when it finishes. unsafeInterleaveIO introduces a second token, so that a new IO operation can start, while the other one is still doing its work.

Usually, you don't have to care about the implementation. If you want to call IO-functions from other languages, GHC cares about removing the IO wrapper. Consider this small snippet:

printInt :: Int -> IO ()
printInt int = do putStr "The argument is: "
                  print int

foreign export ccall printInt :: Int -> IO ()

This generates a symbol to call printInt from C. The function becomes:

extern void printInt(HsInt a1);

Where HsInt is just a (depending on your platform) typedefd int. So you see, the monad IO has been removed completely.

Below is the actual implementation of `IO` in GHC 7.10.

The IO type is essentially a state monad at type State# RealWorld (defined in GHC.Types):

{- |
A value of type @'IO' a@ is a computation which, when performed,
does some I\/O before returning a value of type @a@.
There is really only one way to \"perform\" an I\/O action: bind it to
@Main.main@ in your program.  When your program is run, the I\/O will
be performed.  It isn't possible to perform I\/O from an arbitrary
function, unless that function is itself in the 'IO' monad and called
at some point, directly or indirectly, from @Main.main@.
'IO' is a monad, so 'IO' actions can be combined using either the do-notation
or the '>>' and '>>=' operations from the 'Monad' class.
-}
newtype IO a = IO (State# RealWorld -> (# State# RealWorld, a #))

The IO monad is strict, because bindIO is defined by case matching (defined in GHC.Base):

instance  Monad IO  where
    {-# INLINE return #-}
    {-# INLINE (>>)   #-}
    {-# INLINE (>>=)  #-}
    m >> k    = m >>= \ _ -> k
    return    = returnIO
    (>>=)     = bindIO
    fail s    = failIO s

returnIO :: a -> IO a
returnIO x = IO $ \ s -> (# s, x #)

bindIO :: IO a -> (a -> IO b) -> IO b
bindIO (IO m) k = IO $ \ s -> case m s of (# new_s, a #) -> unIO (k a) new_s

This implementation is discussed in a blog post by Edward Yang.

I will leave the question of implementing IO to other people who know a bit more. (Though I will point out, as I'm sure they will as well, that the real question isn't "How is IO implemented in Haskell?" but rather "How is IO implemented in GHC?" or "How is IO implemented in Hugs?", etc. I imagine the implementations vary enormously.) However, this question:

how to call haskell function (IO) from another language and do I in that case need to maintain IO my self?

...is answered in depth in the FFI specification.

In fact "IO a" is just "() -> a" in an impure language (where functions can have side effect). Let's say you want to implement IO in SML :

structure Io : MONAD =
struct
  type 'a t = unit -> 'a
  return x = fn () => x
  fun (ma >>= g) () = let a = ma ()
                      in g a ()
  executeIo ma = ma ()
end

Note: my experience with Clean is minimal - consider yourself warned!

Based on System.IO, using a variation of an approach described by F. Warren Burton:

definition module System.Alt.IO

from Control.Applicative import class pure, class <*>, class Applicative
from Data.Functor import class Functor
from Control.Monad import class Monad
from StdOverloaded import class toString
from System._OI import OI

:: IO a = IO .(*OI -> a)

execIO :: !(IO a) !*World -> *World

evalIO :: !(IO a) !*World -> *(a, !*World)

withOI :: (*OI -> .a) -> IO .a

putStr :: String -> IO ()

putStrLn :: String -> IO ()

print :: a -> IO () | toString a

getChar :: IO Char

getLine :: IO String

readFileM :: !String -> IO String

writeFileM :: !String !String -> IO ()

instance Functor IO
instance pure IO
instance <*> IO
instance Monad IO

unsafePerformIO :: !(*OI -> .a) -> .a
unsafePerformIOTrue :: !(*OI -> a) -> Bool

implementation module System.Alt.IO

import StdFile
from StdFunc import o, id
import StdMisc
import StdString

import System._OI
import Control.Applicative
import Control.Monad
import Data.Functor
from Text import class Text (trim), instance Text String

execIO :: !(IO a) !*World -> *World
execIO (IO g) world
  #  (u, world) = newOI world
  #! x = g u
  =  world

evalIO :: !(IO a) !*World -> *(a, !*World)
evalIO (IO g) world
  #  (u, world) = newOI world
  #! x = g u
  =  (x, world)

withWorld :: (*World -> *(.a, *World)) -> IO .a
withWorld f = IO g
where
    g u
      # (x, world) = f (getWorld u)
      = from_world "withWorld" x world

instance Functor IO
where
    fmap f x = x >>= (lift o f)

instance pure IO
where
    pure x     = IO (\u -> case partOI u of (_, _) = x)

instance <*> IO
where
    (<*>) f g  = liftA2 id f g

instance Monad IO where
  bind ma a2mb = IO (run ma)
    where
    run (IO g) u
      #  (u1, u2) = partOI u
      #! x        = g u1
      #  (IO k)   = a2mb x
      = k u2

putStr :: String -> IO ()
putStr str = withWorld f
  where
  f world
    # (out, world) = stdio world
    # out          = fwrites str out
    # (_, world)   = fclose out world
    = ((), world)

putStrLn :: String -> IO ()
putStrLn str = putStr (str +++ "\n")

print :: a -> IO () | toString a
print x = putStrLn (toString x)

getChar :: IO Char
getChar = withWorld f
  where
  f world
    # (input, world) = stdio world
    # (ok, c, input) = freadc input
    # (_, world)     = fclose input world
    = (c, world)

getLine :: IO String
getLine = withWorld f
  where
  f world
    # (input, world) = stdio world
    # (str, input)   = freadline input
    # (_, world)     = fclose input world
    = (trim str, world)

readFileM :: !String -> IO String
readFileM name = withWorld f
  where
  f world
    # (ok, file, world) = fopen name FReadText world
    # (str, file)       = freads file 16777216
    # (ok, world)       = fclose file world
    = (str, world)

writeFileM :: !String !String -> IO ()
writeFileM name txt = withWorld f
  where
  f world
    # (ok, file, world) = fopen name FWriteText world
    # file              = fwrites txt file
    # (ok, world)       = fclose file world
    = ((), world)

unsafePerformIO :: !(*OI -> .a) -> .a
unsafePerformIO f
  #! x = f make_oi
  =  x

unsafePerformIOTrue :: !(*OI -> a) -> Bool
unsafePerformIOTrue f
  #! x = f make_oi
  =  True

make_oi
  # (u, w) = newOI make_world
  = from_world "make_oi" u w

from_world :: String a !*World -> a
from_world name x world
  | world_to_true world = x
  | otherwise           = abort ("error in +++ name +++ "\n") 

world_to_true :: !*World -> Bool
world_to_true world
  = code inline {
    pop_a 1
    pushB TRUE
}

make_world
  = code {
    fillI 65536 0
  }

definition module System._OI

from StdFunc import id

:: OI
partOI :: *OI -> *(*OI, *OI)

newOI :: *World -> *(*OI, *World)
getWorld :: *OI -> *World

implementation module System._OI

:: OI = OI

partOI :: !*OI -> *(*OI, *OI)           // NOTE - this may need
partOI u                                // to be defined with
  # u1 = OI                             // ABC instructions
  # u2 = id OI
  = (u1, u2)

newOI :: !*World -> *(*OI, *World)      // ...or:  Start :: *OI -> ()
newOI world
  = (OI, world)

getWorld :: !*OI -> *World              // only needed because I/O
getWorld OI                             // operations in Clean 3.0
  = code inline { fillA 65536 0 }       // rely on World values